# source("E:/UNIVERSITAT/PHD/PHD/PAPERS/PAPER - AID - FDI/SUBMISSIONS/PSRM/PSRM REPLICATION MATERIALS/Replication code_PSRM_Aid_FDI_Personalism.R", echo = T, max.deparse.length = 1000000)


################################################################################
#                                                                              # 
# Foreign Aid, FDI and the Personalization of Power in Autocracies             #
# Bernat Puertas* and Abel Escribà-Folch*+                                     #
# *Universitat Pompeu Fabra                                                    #
# +Institut Barcelona d'Estudis Internacionals                                 #
# Contact: bernat.puertas@upf.edu                                              #
#                                                                              #  
################################################################################



# This script shows the code for reproducing the cleaning and analysis.
# It script should be used together with some .do files (we specify in the code when to you use the .do file).


rm(list=ls())


# Set working directory --------------------------------------------------------

# Change the working directory to the folder where the data is stored.

setwd("E:/UNIVERSITAT/PHD/PHD/PAPERS/PAPER - AID - FDI/SUBMISSIONS/PSRM/PSRM REPLICATION MATERIALS") 



# Create log file --------------------------------------------------------------

sink("PSRM_Aid_FDI_Personalism.log", split = T)



# Load libraries --------------------------------------------------------------

library(tidyverse) # Data manipulation
library(countrycode) # For COW codes
library(readstata13) # Read .dta files
library(DataCombine) # Data manipulation and lags
library(fixest) # FE regression analysis
library(estimatr) # For regression analysis. 
library(modelsummary) # For regression results
library(sandwich) # SE robust to heteroskedasticity
library(clubSandwich) # Clustered SE
library(lmtest) # For robust SE
library(mediation) # For mediation analysis
library(patchwork) # Combining plots
library(tinytex) # For exporting tables 
library(pdftools) # For exporting tables



# Set seed ---------------------------------------------------------------------

set.seed(688648)


# CLEANING ---------------------------------------------------------------------


## GWF -------------------------------------------------------------------------

gwf_pers <- read.csv("GWF_personalism_raw.csv")


gwf_pers <- dplyr::select(gwf_pers, c("cowcode",
                                      "year",
                                      "gwf_country",
                                      "gwf_casename",
                                      "gwf_caseid",
                                      "gwf_case_duration",
                                      "gwf_case_fail",
                                      "gwf_firstldr",
                                      "gwf_leadername",
                                      "gwf_leaderid",
                                      "gwf_leader_duration",
                                      "gwf_leader_fail",
                                      "latent_personalism",
                                      "pers_2pl",
                                      "pers_se_2pl",
                                      "partyexcom_pers",
                                      "partyrbrstmp",
                                      "officepers",
                                      "createparty",
                                      "milnotrial",
                                      "milmerit_persB",
                                      "paramil_pers",
                                      "sectyapp_pers",
                                      "region",
                                      "ld",
                                      "G_age"))


gwf_pers <- rename(gwf_pers, 
                   country = gwf_country,
                   case_name = gwf_casename,
                   case_id = gwf_caseid,
                   case_duration = gwf_case_duration,
                   case_fail = gwf_case_fail,
                   first_leader = gwf_firstldr,
                   leader_name = gwf_leadername,
                   leader_id = gwf_leaderid,
                   leader_duration = gwf_leader_duration,
                   leader_fail = gwf_leader_fail,
                   personalism = latent_personalism,
                   case_duration_log = ld)


gwf_autocr <- read.dta13("GWF_raw.dta")


gwf_autocr <- dplyr::select(gwf_autocr, c("cowcode",
                                          "year",
                                          "period1",
                                          "period2",
                                          "period3",
                                          "period4",
                                          "period5",
                                          "period6",
                                          "period7",
                                          "period8",
                                          "period9",
                                          "period10",
                                          "period11",
                                          "period12",
                                          "coldwar"))



## AID DATA PROJECT ------------------------------------------------------------

aid <- read.csv("AidData Project_raw.csv")


# Select some variables from the df

aid <- dplyr::select(aid, c("year",
                            "donor",
                            "donor_iso", 
                            "recipient",
                            "recipient_iso",
                            "short_description", 
                            "aiddata_purpose_code",
                            "aiddata_purpose_name",
                            "aiddata_activity_codes", 
                            "aiddata_activity_names",
                            "flow_name",
                            "commitment_amount_usd_constant", 
                            "commitment_amount_usd_current",
                            "additional_info"))


aid <- aid %>%
  mutate(cowcode = countrycode(sourcevar = recipient_iso,
                               origin = "iso2c",
                               destination = "cown")) 


# ISO = RS --> Serbia doesn't match with any cowcode. Serbia cowcode = 345 (same of Yugoslavia). Done
# ISO = CE --> Czech Republic. Not a problem because the country-year observations in GWF end in 1989. 
# ISO = CS --> Serbia and Montenegro. Not a problem because the country-year observations for Serbia in GWF end in 2000.
# ISO = SU --> Soviet Union. --> Must add Russia's code = 365. Done.
# ISO = YU --> Yugoslavia, cowcode = 345. Done

write.csv(aid, "AidData Project_noCOW.csv")

# We manually add the (recipient) cowcodes to these cases. 

# Use "Cleaning.do" file in STATA 

# Load again df with (recipient) cowcodes added. 

aid <- read.csv("AidData Project_COW.csv")

aid <- dplyr::select(aid, -c("donor_iso",
                             "recipient_iso"))


# We need to delete observations for some cases that share cowcode but are coded in the same year. Check cases.

# Russia 

aid <- aid[-(1218040:1218057),]

aid <- aid %>% 
  as.data.frame(row.names = 1:nrow(.))

# Yugoslavia

aid <- aid[-(1532682:1532733),]

aid <- aid %>% 
  as.data.frame(row.names = 1:nrow(.))


# Delete observations with cowcode = NA #

aid <- subset(aid, cowcode != "NA")


# Currently, the df is not in TSCS format (some countries have more observations for a given year).
# When merging, this is going to be a problem. 
# We have data on total aid (as ODA) from the WDI. 
# AidData Project's relevance lies in the distinction between economic and democracy aid (see Dietrich and Wright, 2015)
# Instead of aggregating the amount by type (if you summarize the types, almost 95% is ODA_grant, if we remember correctly), which
# means that there are many types of aid that have a small number of observations while ODA is the main one.
# Thus, we'll use ODA from the WDI
# Now, with AidData we can extract more fine-grained information and create the variables of eco_aid and demo_aid
# We'll follow Dietrich and Wright (2015, see Appendix Table A5) to select which purposes to use for each variable:
#   - Democracy aid: 
#       1. Eco. and develop. policy and planning; 
#       2. Public sector financial management.
#       3. Legal and judicial develp.
#       4. Govern. admin. 
#       5. Strength. civil society.
#       6. Conflict prevention...
#       7. Civilian peace-building...
#
#   - Economic aid: rest of purposes, except "refugee in donor countries"


aid2 <- aid %>%
  mutate(type_aid = case_when(aiddata_purpose_code == 15110 |
                                aiddata_purpose_code == 15120 |
                                aiddata_purpose_code == 15130 |
                                aiddata_purpose_code == 15140 |
                                aiddata_purpose_code == 15150 |
                                aiddata_purpose_code == 15205 |
                                aiddata_purpose_code == 15220 ~ "demo_aid",
                              aiddata_purpose_code == 93010 ~ "refugee_aid",
                              is.na(aiddata_purpose_code) ~ NA,
                              TRUE ~ "eco_aid")) %>%
  na.omit(aid2$type_aid) %>%
  group_by(recipient, 
           year, 
           cowcode, 
           type_aid) %>%
  summarise(money = sum(commitment_amount_usd_constant)) %>%
  pivot_wider(names_from = type_aid,
              values_from = money)


# Create ODA variable from AidData Project #

aid3 <- aid %>%
  group_by(recipient, 
           year, 
           flow_name, 
           cowcode) %>%
  summarise(commitment = sum(commitment_amount_usd_constant)) %>%
  group_by(recipient,
           year, 
           flow_name, 
           cowcode) %>%
  pivot_wider(names_from = flow_name,
              values_from = commitment,
              names_prefix = "constant_") %>%
  group_by(year, 
           cowcode) %>%
  mutate(oda_aiddata = sum(sum(`constant_ODA Grants`), 
                           sum(`constant_ODA Loans`))) %>%
  dplyr::select(c("recipient",
                  "year", 
                  "cowcode", 
                  "oda_aiddata"))


aid_final <- merge(aid2, 
                   aid3, 
                   by = c("cowcode", "year"), 
                   all.x = TRUE)


# Keep only vars that we need # 

aid_final <- dplyr::select(aid_final, c("cowcode",
                                        "year",
                                        "recipient.x",
                                        "eco_aid",
                                        "demo_aid",
                                        "oda_aiddata"))

aid_final <- rename(aid_final,
                    recipient = recipient.x)



## WDI -------------------------------------------------------------------------


wdi <- read.csv("WDI_raw.csv")


# Let's first rename the variables into a more easy-writing names

wdi <- rename(wdi, 
              gdppc = GDP.per.capita..constant.2015.US....NY.GDP.PCAP.KD.,
              oda_constant = Net.official.development.assistance.received..constant.2020.US....DT.ODA.ODAT.KD.,
              oda_current = Net.official.development.assistance.received..current.US....DT.ODA.ODAT.CD.,
              fdi_current = Foreign.direct.investment..net.inflows..BoP..current.US....BX.KLT.DINV.CD.WD.,
              fdi_gdp = Foreign.direct.investment..net.inflows....of.GDP...BX.KLT.DINV.WD.GD.ZS.,
              gdppc_growth =GDP.per.capita.growth..annual.....NY.GDP.PCAP.KD.ZG.,
              population = Population..total..SP.POP.TOTL.,
              cpi = Consumer.price.index..2010...100...FP.CPI.TOTL.,
              country = Country.Name,
              country_code = Country.Code,
              year = Time)


wdi <- dplyr::select(wdi, -c("Time.Code"))


# WDI returns a df with ISO3 Character countrycodes. Let's add cowcodes again #


wdi <- wdi %>%
  mutate(cowcode = countrycode(sourcevar = country_code,
                               origin = "iso3c",
                               destination = "cown")) 


# Again, there are some countries/regions for which no cowocode is added. Let's check which ones.
# ABW = Aruba --> Not a problem, not in GWF df.
# ASM = American Samoa --> Not a problem, not in GWF df.
# BMU = Bermuda --> Not a problem, not in GWF df.
# CHI = Channel Island --> Not a problem, not in GWF df.
# CUW = Curaçao --> Not a problem, not in GWF df.
# CYM = Cayman Islands --> Not a problem, not in GWF df.
# FRO = Faroe Islands --> Not a problem, not in GWF df.
# GIB = Gibraltar --> Not a problem, not in GWF df.
# GRL = Greenland --> Not a problem, not in GWF df.
# GUM = Guam --> Not a problem, not in GWF df.
# HKG = Hong Kong --> Not a problem, not in GWF df.
# IMN = Isle of Man --> Not a problem, not in GWF df.
# MAC = Macao --> Not a problem, not in GWF df.
# MAF = Saint Martin (French part) --> Not a problem, not in GWF df.
# MNP = Northen Mariana Islands --> Not a problem, not in GWF df.
# NCL = New Caledonia --> Not a problem, not in GWF df.
# PRI = Puerto Rico --> Not a problem, not in GWF df.
# PSE = Palestine --> Not a problem, not in GWF df.
# PYF = French Polynesia --> Not a problem, not in GWF df.
# SRB = Serbia --> COWCODE = 345
# SXM = Saint Maarten (Dutch part) --> Not a problem, not in GWF df.
# TCA = Turks and Caicos Islands --> Not a problem, not in GWF df.
# VGB = Virgin Islands (British) --> Not a problem, not in GWF df.
# VIR = Virgin Islands (US) --> Not a problem, not in GWF df.
# XKX = Kosovo --> Not a problem, not in GWF df.


# Only cowocode that needs to be added is Serbia (cowcode = 345)
# Let's add it manually. Use "Cleaning.do" file

write.csv(wdi, "WDI_noCOW.csv")


# Once added, let's load it again with all the cowcodes

wdi <- read.csv("WDI_COW.csv")


wdi <- rename(wdi, 
              country = v2, 
              country.code = v3,
              year = v4,
              oda_constant = v5,
              oda_current = v6,
              fdi_gdp = v7,
              fdi_current = v8,
              population = v9,
              gdppc = v10,
              gdppc_growth = v11,
              cpi = v12,
              cowcode = v13)


# Delete 1st var #

wdi <- dplyr::select(wdi, -c("v1",
                             "country.code"))


# Delete first row 

wdi <- wdi[-1,] 


# Delete last rows

wdi <- wdi[-(11068:11073),]


# Delete observations with cowcode = NA

wdi <- subset(wdi, cowcode != "NA") 

# Of course, there are still many countries in the df that are not in GWF, but they have a cowcode.
# Leave them for the moment, and when we merge the dfs won't appear anymore. 


# Character to numeric vars

wdi <- wdi %>%
  mutate(year = as.numeric(year),
         cowcode = as.numeric(cowcode),
         oda_constant = as.numeric(oda_constant),
         oda_current = as.numeric(oda_current),
         fdi_current = as.numeric(fdi_current),
         fdi_gdp = as.numeric(fdi_gdp),
         population = as.numeric(population),
         gdppc_growth = as.numeric(gdppc_growth),
         gdppc = as.numeric(gdppc),
         cpi = as.numeric(cpi))


# Base year of CPI = 100, let's scale it to 1

wdi$cpi = wdi$cpi/100 # Rescale cpi so we have the base year = 1, not 100.


# Let's transform from current to constant USD some vars #

wdi <- wdi %>%
  mutate(oda_cpi = oda_current / cpi,
         fdi_cpi = fdi_current / cpi)


# Let's see how many NAs have each IV 

sum(is.na(wdi$cpi)) # NA = 3432
sum(is.na(wdi$oda_constant)) # NA = 3154
sum(is.na(wdi$oda_cpi)) # NA = 5113
sum(is.na(wdi$oda_current)) # NA = 3154
sum(is.na(wdi$fdi_current)) # NA = 3312
sum(is.na(wdi$fdi_cpi)) # NA = 4422


# Because the cpi has many NAs, when we deflate oda_current and fdi_current,
# we lose many observations.

# After merging we'll see how many NAs have each var because now there are many 
# countries that aren't in the GWF df. 


## EPR -------------------------------------------------------------------------

epr <- read.dta13("EPR_raw.dta")

epr <- dplyr::select(epr, c("year",
                            "cowcode",
                            "country",
                            "popavg",
                            "ongoinghiwarl"))

# Format date form y/m/d --> Year

epr$year <- str_sub(epr$year, 1, 4)



## ROSS ------------------------------------------------------------------------

ross <- read.csv("Ross_raw.csv")

ross <- dplyr::select(ross, c("cty_name",
                              "iso3numeric",
                              "id",
                              "year",
                              "oil_gas_valuePOP_2014"))

ross <- ross %>%
  mutate(cowcode = countrycode(sourcevar = iso3numeric,
                               origin = "iso3n",
                               destination = "cown")) 

# There are several countries that ISO3Numeric numbers do not match cowcodes again.
# Check which ones and add them manually.
# ISO = 200 --> Czechoslovakia, cowcode = 315
# ISO = 230 --> Ethiopia including Eritrea. Criteria = don't use it because observations for Ethiopia and Ethiopia + Eritrea are very equal.
# ISO = 278 --> RDA, cowcode = 265
# ISO = 280 --> RFA. Not a problem because it's not in GWF df. 
# ISO = 688 --> Serbia, cowcode = 345. Observations before independence should be deleted because they match Yugoslavia's cowcode.
# ISO = 714 --> South Vietnam, cowcode = 817
# ISO = 720 --> South Yemen, cowcode = 680
# ISO = 736 --> Sudan including South Sudan. Criteria = because there are observations for Sudan only, I'm gonna use only those. 
# ISO = 810 --> USSR, cowcode = 365. Here I need to delete observations after 1991 because Russia has same cowcode. 
# ISO = 886 --> North Yemen (Yemen Arab Republic), cowcode = 678
# ISO = 890 --> Yugoslavia, cowcode = 345
# ISO = 891 --> Serbia and Montenegro. Not gonna use this, only Serbia. 
# ISO = 991 --> France including Algeria. Not in GWf df. 
# ISO = 997 --> Pakistan including East Pakistan. Same criteria as for Ethiopia + Eritrea and Sudan + South Sudan. 
# ISO = 998 --> North Vietnam, cowcode = 816


write.csv(ross, "Ross_noCOW.csv")

# Use "Cleaning.do" file 

# Load again Ross with cowcodes added. 

ross <- read.csv("Ross_COW.csv")

ross <- dplyr::select(ross, -c("v1",
                               "iso3numeric",
                               "id"))


# Also, there are cases which we need to delete observations before or after a 
# specific year because they didn't exist as such or ceased to exist.
# This could create problems because same cowcode for different cases. 

# Russia --> delete observations before 1992. Same cowcode as USSR.

ross <- ross[-(11455:11514),]

ross <- ross %>% 
  as.data.frame(row.names = 1:nrow(.))

# Serbia -->  delete observations before 1991. Same cowcode as Yugoslavia.

ross <- ross[-(11727:11785),]

ross <- ross %>% 
  as.data.frame(row.names = 1:nrow(.))

# USSR --> delete observations after 1992. Same cowcode as Russia.

ross <- ross[-(12641:12663),]

ross <- ross %>% 
  as.data.frame(row.names = 1:nrow(.))

# Yugoslavia --> delete observations after 1991. Same cowcode as Serbia.

ross <- ross[-(15190:15213),]

ross <- ross %>% 
  as.data.frame(row.names = 1:nrow(.))

# Vietnam --> delete observations of "Vietnam" and keep "North Vietnam" 

ross <- ross[-(14799:14881),]

ross <- ross %>% 
  as.data.frame(row.names = 1:nrow(.))

# Delete observations that have cowcode = NA 

ross <- subset(ross, cowcode != "NA")

# Delete observations before 1946

ross <- subset(ross, year > 1945)

# Delete observations after 2010

ross <- subset(ross, year < 2011)

# Rename variables 

ross <- rename(ross, 
               oilpc = oil_gas_valuepop_2014)



## Military spending -----------------------------------------------------------

# Data of Military spending is from Singer, Bremer and Stuckey (1972), although we
# take it from the Chin et al. (2022, CPS) replication materials.


chinetal <- read.dta13("chin et al 2021.dta")

chinetal <- dplyr::select(chinetal, c("cowcode",
                                      "year",
                                      "nmc_milex"))

chinetal <- rename(chinetal,
                   milex = nmc_milex)



## Merge all -------------------------------------------------------------------

merge1 <- merge(gwf_pers, 
                gwf_autocr, 
                by = c("cowcode", "year"), 
                all.x = TRUE)

merge2 <- merge(merge1, 
                epr, 
                by = c("cowcode", "year"), 
                all.x = TRUE)

merge3 <- merge(merge2,
                ross, 
                by = c("cowcode", "year"), 
                all.x = TRUE)

merge4 <- merge(merge3, 
                aid_final, 
                by = c("cowcode", "year"), 
                all.x = TRUE)

merge5 <- merge(merge4, 
                wdi, 
                by = c("cowcode", "year"), 
                all.x = TRUE)

merge6 <- merge(merge5, 
                chinetal, 
                by = c("cowcode", "year"), 
                all.x = TRUE)



## Clean merged df --------------------------------------------------------

# Delete some variables that duplicate after the merging

merge6 <- dplyr::select(merge6, -c(country.y,
                                   cty_name,
                                   recipient, 
                                   country))


# Rename variables #

merge6 <- rename(merge6,
                 country = country.x,
                 partyexcom = partyexcom_pers,
                 milmerit = milmerit_persB,
                 paramil = paramil_pers,
                 sectyapp = sectyapp_pers,
                 population_epr = popavg,
                 civilwar_lag = ongoinghiwarl)


# Which variable of population has less NAs?
# This is important to know which variable use to create per capita values.

sum(is.na(merge6$population_epr)) # NA = 97
sum(is.na(merge6$population)) # NA = 804

# The per capita values of the variables with "population_epr", which has more observations. Let's use this one.
# For WDI variables, we'll use the population variable to create the per capita values. More consistent. 
# However, we need to rescale population_epr

merge6$population_epr <- merge6$population_epr*1000

# The next pipe does the following:
# First, milex to constant USD. 
# Second, per capita values
# Third, log transformation.


merge6 <- merge6 %>%
  mutate(milex_cpi = milex / cpi) %>%
  mutate(eco_aid_pc = eco_aid / population_epr,
         demo_aid_pc = demo_aid / population_epr,
         oda_aiddata_pc = oda_aiddata / population_epr,
         oda_constant_pc = oda_constant / population,
         oda_current_pc = oda_current / population,
         fdi_current_pc = fdi_current / population,
         oda_cpi_pc = oda_cpi / population,
         fdi_cpi_pc = fdi_cpi / population,
         milex_cpi_pc = milex_cpi / population_epr) %>%
  mutate(leader_duration_log = log(leader_duration + 1),
         population_epr_log = log(population_epr),
         oilpc_log = log(oilpc + 1),
         population_log = log(population),
         gdppc_log = log(gdppc),
         eco_aid_log = log(eco_aid + 1),
         eco_aid_pc_log = log(eco_aid_pc + 1),
         demo_aid_log = log(demo_aid + 1),
         demo_aid_pc_log = log(demo_aid_pc + 1),
         oda_aiddata_log = log(oda_aiddata + 1),
         oda_aiddata_pc_log = log(oda_aiddata_pc + 1),
         milex_cpi_pc_log = log(milex_cpi_pc + 1),
         milex_cpi_log = log(milex_cpi + 1))


# ODA and FDI have negatives values. We need to address it. 

# We do to sets of logarithmic transformation:
# 1. Negative values are kept in negative values. Because the ln of a negative value doesn't exist,
# we multiply by -1 the negative values of the original variable so they become positive numbers.  
# Then, we do the logarithmic transformation adding a constant (+ 1) because the ln(0) = NA. 
# Finally, we multiply again by -1 those numbers that in the original variable were negative so the ln of them is again negative.
# This doesn't change the nature of the variable.

# 2. We transform the negative values to 0. This will be the main transformation we use following many papers. 

# The following pipe creates the ln transformation including and excluding negatives values. 


merge6 <- merge6 %>% 
  mutate(oda_constant_neg_log = ifelse(oda_constant < 0, 
                                       (log(-1*oda_constant + 1))*(-1), 
                                       log(oda_constant + 1)),
         oda_constant_pc_neg_log = ifelse(oda_constant_pc < 0, 
                                          (log(-1*oda_constant_pc + 1))*(-1), 
                                          log(oda_constant_pc + 1)),
         oda_current_neg_log = ifelse(oda_current < 0,
                                      (log(-1*oda_current + 1))*(-1),
                                      log(oda_current + 1)),
         oda_current_pc_neg_log = ifelse(oda_current_pc < 0,
                                         (log(-1*oda_current_pc + 1))*(-1),
                                         log(oda_constant + 1)),
         oda_cpi_neg_log = ifelse(oda_cpi < 0, 
                                  (log(-1*oda_cpi + 1))*(-1), 
                                  log(oda_cpi + 1)),
         oda_cpi_pc_neg_log = ifelse(oda_cpi_pc < 0, 
                                     (log(-1*oda_cpi_pc + 1))*(-1), 
                                     log(oda_cpi_pc + 1)),
         fdi_current_neg_log = ifelse(fdi_current < 0, 
                                      (log(-1*fdi_current + 1))*(-1), 
                                      log(fdi_current + 1)),
         fdi_current_pc_neg_log = ifelse(fdi_current_pc < 0, 
                                         (log(-1*fdi_current_pc + 1))*(-1), 
                                         log(fdi_current_pc + 1)),
         fdi_cpi_neg_log = ifelse(fdi_cpi < 0, 
                                  (log(-1*fdi_cpi + 1))*(-1), 
                                  log(fdi_cpi + 1)),
         fdi_cpi_pc_neg_log = ifelse(fdi_cpi_pc < 0, 
                                     (log(-1*fdi_cpi_pc + 1))*(-1), 
                                     log(fdi_cpi_pc + 1))) %>%
  mutate(oda_constant_pos_log = ifelse(oda_constant < 0, 
                                       0, 
                                       log(oda_constant + 1)),
         oda_constant_pc_pos_log = ifelse(oda_constant_pc < 0, 
                                          0, 
                                          log(oda_constant_pc + 1)),
         oda_current_pos_log = ifelse(oda_current < 0, 
                                      0, 
                                      log(oda_current + 1)),
         oda_current_pc_pos_log = ifelse(oda_current_pc < 0, 
                                         0, 
                                         log(oda_current_pc + 1)),
         oda_cpi_pos_log = ifelse(oda_cpi < 0, 
                                  0, 
                                  log(oda_cpi + 1)),
         oda_cpi_pc_pos_log = ifelse(oda_cpi_pc < 0, 
                                     0, 
                                     log(oda_cpi_pc + 1)),
         fdi_current_pos_log = ifelse(fdi_current < 0, 
                                      0, 
                                      log(fdi_current + 1)),
         fdi_current_pc_pos_log = ifelse(fdi_current_pc < 0, 
                                         0, 
                                         log(fdi_current_pc + 1)),
         fdi_cpi_pos_log = ifelse(fdi_cpi < 0, 
                                  0, 
                                  log(fdi_cpi + 1)),
         fdi_cpi_pc_pos_log = ifelse(fdi_cpi_pc < 0, 
                                     0, 
                                     log(fdi_cpi_pc + 1)),)



# Let's do now the cube root transformation of FDI following Wright and Zhu (2018)
# Good thing about the cube root is that it handles negative values. However, R doesn't give the results properly (see ?"^")
# We do the same as before for the log transformation then. 


merge6 <- merge6 %>%
  mutate(fdi_current_cub = ifelse(fdi_current < 0, 
                                  ((-1*fdi_current)^(1/3))*(-1), 
                                  fdi_current^(1/3)),
         fdi_current_pc_cub = ifelse(fdi_current_pc < 0, 
                                     ((-1*fdi_current_pc)^(1/3))*(-1), 
                                     fdi_current_pc^(1/3)),
         fdi_gdp_cub = ifelse(fdi_gdp < 0, 
                              ((-1*fdi_gdp)^(1/3))*(-1), 
                              fdi_gdp^(1/3)),
         fdi_cpi_cub = ifelse(fdi_cpi < 0, 
                              ((-1*fdi_cpi)^(1/3))*(-1), 
                              fdi_cpi^(1/3)),
         fdi_cpi_pc_cub = ifelse(fdi_cpi_pc < 0, 
                                 ((-1*fdi_cpi_pc)^(1/3))*(-1), 
                                 fdi_cpi_pc^(1/3)))



# Let's compare the distribution of FDI logged and FDI cube root 

d1 <- density(merge6$fdi_cpi_pc_neg_log, 
              na.rm = T) # Constant FDI pc negative
plot(d1)


d2 <- density(merge6$fdi_cpi_pc_cub, 
              na.rm = T) # Constant FDI pc negative
plot(d2)


d3 <- density(merge6$fdi_current_pc_neg_log, 
              na.rm = T) # Current FDI pc negative
plot(d3)


d4 <- density(merge6$fdi_current_pc_cub, 
              na.rm = T) # Current FDI pc negative
plot(d4) 


d5 <- density(merge6$fdi_gdp_cub, 
              na.rm = T) # FDI % GDP
plot(d5)


d6 <- density(merge6$fdi_cpi_pc_pos_log, 
              na.rm = T)
plot(d6)


d7 <- density(merge6$fdi_current_pc_pos_log, 
              na.rm = T)
plot(d7)


# Let's compare now the distribution of ODA

d8 <- density(merge6$oda_constant_pc_neg_log,
              na.rm = T)
plot(d8)


d9 <- density(merge6$oda_constant_pc_pos_log, 
              na.rm = T)
plot(d9)


d10 <- density(merge6$oda_cpi_neg_log, 
               na.rm = T)
plot(d10)


d11 <- density(merge6$oda_current_pc_neg_log, 
               na.rm = T)
plot(d11)


d12 <- density(merge6$oda_current_pc_pos_log, 
               na.rm = T)
plot(d12)



# Create lags of each variable. 
# There are many variables that should be lagged, let's make a loop too make it faster.
# The following code is the loop:
# First, we create a vector of all the variables in the df.
# Second, some of the variables don't need to be lagged, so we eliminate them from the vector.
# Third, the loop using the "slide()" function lags all the variables we've chosen. 

vars <- names(merge6)

vars <- vars[!vars %in% c("cowcode", "year", "case_name", "case_id", "case_fail", 
                          "first_leader", "leader_name", "leader_id", "leader_fail", 
                          "pers_2pl", "pers_se_2pl", "partyexcom", "partyrbrstmp", 
                          "officepers", "createparty", "milnotrial", "milmerit", 
                          "paramil", "sectyapp", "region", "G_age", "country", 
                          "period1", "period2", "period3", "period4", "period5",
                          "period6", "period7", "period8", "period9", "period10", 
                          "period11", "period12", "coldwar", "civilwar_lag", "leader_duration", "leader_duration_log")]

for (v in 1:length(vars)) {
  merge6 <- slide(merge6,
                  Var = vars[v],
                  TimeVar = "year",
                  GroupVar = "case_id",
                  NewVar = paste(vars[v], "_lag"),
                  slideBy = -1) 
}


# The "_lag" in the varnames has a space, let's delete it #

names <- names(merge6)

names <- str_replace_all(names, " _lag", "_lag")

names(merge6) <- names



# Leader duration should be lagged by leader_id 

vars2 <- names(merge6)

vars2 <- vars2[vars2 %in% c("leader_duration", "leader_duration_log")]

for (v in 1:length(vars2)) {
  merge6 <- slide(merge6,
                  Var = vars2[v],
                  TimeVar = "year",
                  GroupVar = "leader_id",
                  NewVar = paste(vars2[v], "_lag"),
                  slideBy = -1) 
}


# The "_lag" in the varnames has a space, let's delete it #

names2 <- names(merge6)

names2 <- str_replace_all(names2, " _lag", "_lag")

names(merge6) <- names2



## Save final df --------------------------------------------------------------

data_final <- merge6

write.csv(data_final, "aid-fdi-personalism_NEW.csv")

save.dta13(data_final, "aid-fdi-personalism_NEW.dta")




## Cleaning ends here ---------------------------------------------------




# ANALYSES (MAIN PAPER) -------------------------------------------------


## DATA -------------------------------------------------------------------

data <- read.dta13("aid-fdi-personalism_NEW.dta")


# Split samples CW and Post-CW #

data_cw0 <- subset(data, coldwar != "1") # Post Cold War data

data_cw1 <- subset(data, coldwar != "0") # Cold War data


# Set dictionary for all regression tables #

setFixest_dict(c(oda_constant_pc_pos_log_lag = "ODA pc (log)",
                 fdi_cpi_pc_pos_log_lag = "FDI pc (log)",
                 gdppc_log_lag = "GDP pc (log)",
                 population_log_lag = "Population (log)",
                 oilpc_log_lag = "Oil rents pc (log)",
                 gdppc_growth_lag = "GDP pc growth",
                 leader_id = "Leader",
                 year = "Year",
                 personalism = "Personalism",
                 personalism_party = "Party personalism",
                 personalism_security = "Security personalism",
                 leader_duration_log_lag = "Leader duration (log)",
                 civilwar_lag = "Civil War",
                 partyexcom = "Party executive",
                 partyrbrstmp = "Rubber Stamp",
                 officepers = "Appointments",
                 createparty = "New party",
                 milnotrial = "Purges",
                 milmerit = "Promotions",
                 paramil = "Paramilitary",
                 sectyapp = "Security Apparatus"))



## MAIN MODELS AID ------------------------------------------------------

# 1960-2010 #


# M1: naive model #

aid_naive_m1 <- feols(fml = personalism ~ oda_constant_pc_pos_log_lag | leader_id + year,
                      data = data,
                      cluster = "leader_id",
                      panel.id = c("leader_id", "year"))

print(aid_naive_m1) 


# M2: controls #

aid_controls_m2 <- feols(fml = personalism ~ oda_constant_pc_pos_log_lag + 
                           gdppc_log_lag + 
                           gdppc_growth_lag + 
                           population_log_lag + 
                           oilpc_log_lag + 
                           civilwar_lag + 
                           leader_duration_log_lag | leader_id + year,
                         data = data,
                         cluster = "leader_id",
                         panel.id = c("leader_id", "year"))

print(aid_controls_m2) 



# Cold War #

# M3: naive #

aid_cw1_naive_m3 <- feols(fml = personalism ~ oda_constant_pc_pos_log_lag | leader_id + year,
                          data = data_cw1,
                          cluster = "leader_id",
                          panel.id = c("leader_id", "year"))


print(aid_cw1_naive_m3)



# M4: controls #

aid_cw1_controls_m4 <- feols(fml = personalism ~ oda_constant_pc_pos_log_lag + 
                               gdppc_log_lag + 
                               gdppc_growth_lag + 
                               population_log_lag + 
                               oilpc_log_lag + 
                               civilwar_lag + 
                               leader_duration_log_lag | leader_id + year,
                             data = data_cw1,
                             cluster = "leader_id",
                             panel.id = c("leader_id", "year"))

print(aid_cw1_controls_m4)



# Post Cold War #

# M5: naive #

aid_cw0_naive_m5 <- feols(fml = personalism ~ oda_constant_pc_pos_log_lag | leader_id + year,
                          data = data_cw0,
                          cluster = "leader_id",
                          panel.id = c("leader_id", "year"))

print(aid_cw0_naive_m5)



# M6: controls #

aid_cw0_controls_m6 <- feols(fml = personalism ~ oda_constant_pc_pos_log_lag + 
                               gdppc_log_lag + 
                               gdppc_growth_lag + 
                               population_log_lag + 
                               oilpc_log_lag + 
                               civilwar_lag + 
                               leader_duration_log_lag | leader_id + year,
                             data = data_cw0,
                             cluster = "leader_id",
                             panel.id = c("leader_id", "year"))

print(aid_cw0_controls_m6)


# Table 1: Foreign aid per capita and personalism, 1960-2010 #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.


etable(aid_naive_m1, aid_controls_m2, 
       aid_cw1_naive_m3, aid_cw1_controls_m4,
       aid_cw0_naive_m5, aid_cw0_controls_m6,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010")



# Export Table 1 (PNG)

etable(aid_naive_m1, aid_controls_m2, 
       aid_cw1_naive_m3, aid_cw1_controls_m4,
       aid_cw0_naive_m5, aid_cw0_controls_m6,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010",
       export = "Table 1.png")



## MAIN MODELS FDI ------------------------------------------------------


# M7: naive #

fdi_naive_m7 <- feols(fml = personalism ~ fdi_cpi_pc_pos_log_lag | leader_id + year,
                      data = data,
                      cluster = "leader_id",
                      panel.id = c("leader_id", "year"))

print(fdi_naive_m7)


# M8: controls #

fdi_controls_m8 <- feols(fml = personalism ~ fdi_cpi_pc_pos_log_lag + 
                           gdppc_log_lag + 
                           gdppc_growth_lag + 
                           population_log_lag + 
                           oilpc_log_lag + 
                           civilwar_lag + 
                           leader_duration_log_lag | leader_id + year,
                         data = data,
                         cluster = "leader_id",
                         panel.id = c("leader_id", "year"))


print(fdi_controls_m8)



# Table 2: FDI per capita and personalism, 1970-2010 #


# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.

etable(fdi_naive_m7, fdi_controls_m8,
       style.df = style.df(depvar.title = "",
                           fixef.title = "",
                           fixef.suffix = " FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and personalism, 1970-2010")


# Export Table 2 (PNG)

etable(fdi_naive_m7, fdi_controls_m8,
       style.df = style.df(depvar.title = "",
                           fixef.title = "",
                           fixef.suffix = " FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and personalism, 1970-2010",
       export = "Table 2.png")



## AID: LPM ------------------------------------------------------


aid_sectyapp_m9 <- feols(fml = sectyapp ~ oda_constant_pc_pos_log_lag + 
                           gdppc_log_lag + 
                           gdppc_growth_lag + 
                           population_log_lag + 
                           oilpc_log_lag + 
                           civilwar_lag + 
                           leader_duration_log_lag | leader_id + year,
                         data = data,
                         cluster = "leader_id",
                         panel.id = c("leader_id", "year"))


print(aid_sectyapp_m9)



aid_paramilitary_m10 <- feols(fml = paramil ~ oda_constant_pc_pos_log_lag + 
                                gdppc_log_lag + 
                                gdppc_growth_lag + 
                                population_log_lag + 
                                oilpc_log_lag + 
                                civilwar_lag + 
                                leader_duration_log_lag | leader_id + year,
                              data = data,
                              cluster = "leader_id",
                              panel.id = c("leader_id", "year"))

print(aid_paramilitary_m10)



aid_promotions_m11 <- feols(fml = milmerit ~ oda_constant_pc_pos_log_lag + 
                              gdppc_log_lag + 
                              gdppc_growth_lag + 
                              population_log_lag + 
                              oilpc_log_lag + 
                              civilwar_lag + 
                              leader_duration_log_lag | leader_id + year,
                            data = data,
                            cluster = "leader_id",
                            panel.id = c("leader_id", "year"))

print(aid_promotions_m11)


aid_purges_m12 <- feols(fml = milnotrial ~ oda_constant_pc_pos_log_lag + 
                          gdppc_log_lag + 
                          gdppc_growth_lag + 
                          population_log_lag + 
                          oilpc_log_lag + 
                          civilwar_lag + 
                          leader_duration_log_lag | leader_id + year,
                        data = data,
                        cluster = "leader_id",
                        panel.id = c("leader_id", "year"))


print(aid_purges_m12)



aid_newparty_m13 <- feols(fml = createparty ~ oda_constant_pc_pos_log_lag + 
                            gdppc_log_lag + 
                            gdppc_growth_lag + 
                            population_log_lag + 
                            oilpc_log_lag + 
                            civilwar_lag + 
                            leader_duration_log_lag | leader_id + year,
                          data = data,
                          cluster = "leader_id",
                          panel.id = c("leader_id", "year"))

print(aid_newparty_m13)



aid_appointments_m14 <- feols(fml = officepers ~ oda_constant_pc_pos_log_lag + 
                                gdppc_log_lag + 
                                gdppc_growth_lag + 
                                population_log_lag + 
                                oilpc_log_lag + 
                                civilwar_lag + 
                                leader_duration_log_lag | leader_id + year,
                              data = data,
                              cluster = "leader_id",
                              panel.id = c("leader_id", "year"))

print(aid_appointments_m14)



aid_rubberstamp_m15 <- feols(fml = partyrbrstmp ~ oda_constant_pc_pos_log_lag + 
                               gdppc_log_lag + 
                               gdppc_growth_lag + 
                               population_log_lag + 
                               oilpc_log_lag + 
                               civilwar_lag + 
                               leader_duration_log_lag | leader_id + year,
                             data = data,
                             cluster = "leader_id",
                             panel.id = c("leader_id", "year"))


print(aid_rubberstamp_m15)



aid_partyexecutive_m16 <- feols(fml = partyexcom ~ oda_constant_pc_pos_log_lag + 
                                  gdppc_log_lag + 
                                  gdppc_growth_lag + 
                                  population_log_lag + 
                                  oilpc_log_lag + 
                                  civilwar_lag + 
                                  leader_duration_log_lag | leader_id + year,
                                data = data,
                                cluster = "leader_id",
                                panel.id = c("leader_id", "year"))

print(aid_partyexecutive_m16)



# Table 3: Foreign aid per capita and dis-aggregated personalism indicators, LPM #


# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.

etable(aid_sectyapp_m9, aid_paramilitary_m10, aid_promotions_m11, aid_purges_m12, aid_newparty_m13, aid_appointments_m14, aid_rubberstamp_m15, aid_partyexecutive_m16, 
       style.df = style.df(depvar.title = "",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and dis-aggregated personalism indicators, LPM")


# Export Table 3 (PNG)

etable(aid_sectyapp_m9, aid_paramilitary_m10, aid_promotions_m11, aid_purges_m12, aid_newparty_m13, aid_appointments_m14, aid_rubberstamp_m15, aid_partyexecutive_m16, 
       style.df = style.df(depvar.title = "",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and dis-aggregated personalism indicators, LPM",
       export = "Table 3.png")


## FDI: LPM ------------------------------------------------------


fdi_sectyapp_m17 <- feols(fml = sectyapp ~ fdi_cpi_pc_pos_log_lag + 
                            gdppc_log_lag + 
                            gdppc_growth_lag + 
                            population_log_lag + 
                            oilpc_log_lag + 
                            civilwar_lag + 
                            leader_duration_log_lag | leader_id + year,
                          data = data,
                          cluster = "leader_id",
                          panel.id = c("leader_id", "year"))

print(fdi_sectyapp_m17)



fdi_paramilitary_m18 <- feols(fml = paramil ~ fdi_cpi_pc_pos_log_lag + 
                                gdppc_log_lag + 
                                gdppc_growth_lag + 
                                population_log_lag + 
                                oilpc_log_lag + 
                                civilwar_lag + 
                                leader_duration_log_lag | leader_id + year,
                              data = data,
                              cluster = "leader_id",
                              panel.id = c("leader_id", "year"))

print(fdi_paramilitary_m18)




fdi_promotions_m19 <- feols(fml = milmerit ~ fdi_cpi_pc_pos_log_lag + 
                              gdppc_log_lag + 
                              gdppc_growth_lag + 
                              population_log_lag + 
                              oilpc_log_lag + 
                              civilwar_lag + 
                              leader_duration_log_lag | leader_id + year,
                            data = data,
                            cluster = "leader_id",
                            panel.id = c("leader_id", "year"))

print(fdi_promotions_m19)



fdi_purges_m20 <- feols(fml = milnotrial ~ fdi_cpi_pc_pos_log_lag + 
                          gdppc_log_lag + 
                          gdppc_growth_lag + 
                          population_log_lag + 
                          oilpc_log_lag + 
                          civilwar_lag + 
                          leader_duration_log_lag | leader_id + year,
                        data = data,
                        cluster = "leader_id",
                        panel.id = c("leader_id", "year"))


print(fdi_purges_m20)



fdi_newparty_m21 <- feols(fml = createparty ~ fdi_cpi_pc_pos_log_lag + 
                            gdppc_log_lag + 
                            gdppc_growth_lag + 
                            population_log_lag + 
                            oilpc_log_lag + 
                            civilwar_lag + 
                            leader_duration_log_lag | leader_id + year,
                          data = data,
                          cluster = "leader_id",
                          panel.id = c("leader_id", "year"))

print(fdi_newparty_m21)



fdi_appointments_m22 <- feols(fml = officepers ~ fdi_cpi_pc_pos_log_lag + 
                                gdppc_log_lag + 
                                gdppc_growth_lag + 
                                population_log_lag + 
                                oilpc_log_lag + 
                                civilwar_lag + 
                                leader_duration_log_lag | leader_id + year,
                              data = data,
                              cluster = "leader_id",
                              panel.id = c("leader_id", "year"))

print(fdi_appointments_m22)



fdi_rubberstamp_m23 <- feols(fml = partyrbrstmp ~ fdi_cpi_pc_pos_log_lag + 
                               gdppc_log_lag + 
                               gdppc_growth_lag + 
                               population_log_lag + 
                               oilpc_log_lag + 
                               civilwar_lag + 
                               leader_duration_log_lag | leader_id + year,
                             data = data,
                             cluster = "leader_id",
                             panel.id = c("leader_id", "year"))


print(fdi_rubberstamp_m23)



fdi_partyexecutive_m24 <- feols(fml = partyexcom ~ fdi_cpi_pc_pos_log_lag + 
                                  gdppc_log_lag + 
                                  gdppc_growth_lag + 
                                  population_log_lag + 
                                  oilpc_log_lag + 
                                  civilwar_lag + 
                                  leader_duration_log_lag | leader_id + year,
                                data = data,
                                cluster = "leader_id",
                                panel.id = c("leader_id", "year"))


print(fdi_partyexecutive_m24)



# Table 4: FDI per capita and dis-aggregated personalism indicators, LPMs #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.

etable(fdi_sectyapp_m17, fdi_paramilitary_m18, fdi_promotions_m19, fdi_purges_m20, fdi_newparty_m21, fdi_appointments_m22, fdi_rubberstamp_m23, fdi_partyexecutive_m24, 
       style.df = style.df(depvar.title = "",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and dis-aggregated personalism indicators, LPMs")


# Export Table 4 (PNG)

etable(fdi_sectyapp_m17, fdi_paramilitary_m18, fdi_promotions_m19, fdi_purges_m20, fdi_newparty_m21, fdi_appointments_m22, fdi_rubberstamp_m23, fdi_partyexecutive_m24, 
       style.df = style.df(depvar.title = "",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and dis-aggregated personalism indicators, LPMs",
       export = "Table 4.png")



## SECURITY AND PARTY PERSONALISM --------------------------------------------

# We also analyze the effect of aid and FDI using the Security personalization and Party personalization indexes from
# Chin, Song and Wright (BJPS, 2023). We replicate their .do file in order to get the new indexes. 
# The .do files can be accessed in the original replication materials, we do not add it in our materials.


chindata <- read.dta13("Party and security personalism.dta")

data2 <- merge(data, chindata, by = c("cowcode","year"), all.x = T)

data2 <- rename(data2,
                personalism_chin = xpers,
                personalism_party = xpers1,
                personalism_security = xpers2)



## AID ------------------------------------------------------------

aid_securitypers_naive_m25 <- feols(fml = personalism_security ~ oda_constant_pc_pos_log_lag | leader_id + year,
                                    data = data2,
                                    cluster = "leader_id",
                                    panel.id = c("leader_id", "year"))


print(aid_securitypers_naive_m25)



aid_securitypers_controls_m26 <- feols(fml = personalism_security ~ oda_constant_pc_pos_log_lag + 
                                         gdppc_log_lag + 
                                         gdppc_growth_lag + 
                                         population_log_lag + 
                                         oilpc_log_lag + 
                                         civilwar_lag + 
                                         leader_duration_log_lag | leader_id + year,
                                       data = data2,
                                       cluster = "leader_id",
                                       panel.id = c("leader_id", "year"))

print(aid_securitypers_controls_m26)



aid_partypers_naive_m29 <- feols(fml = personalism_party ~ oda_constant_pc_pos_log_lag | leader_id + year,
                                 data = data2,
                                 cluster = "leader_id",
                                 panel.id = c("leader_id", "year"))

print(aid_partypers_naive_m29)



aid_partypers_controls_m30 <- feols(fml = personalism_party ~ oda_constant_pc_pos_log_lag +
                                      gdppc_log_lag + 
                                      gdppc_growth_lag + 
                                      population_log_lag + 
                                      oilpc_log_lag + 
                                      civilwar_lag + 
                                      leader_duration_log_lag | leader_id + year,
                                    data = data2,
                                    cluster = "leader_id",
                                    panel.id = c("leader_id", "year"))

print(aid_partypers_controls_m30)



## FDI ------------------------------------------------------  


fdi_securitypers_naive_m27 <- feols(fml = personalism_security ~ fdi_cpi_pc_pos_log_lag | leader_id + year,
                                    data = data2,
                                    cluster = "leader_id",
                                    panel.id = c("leader_id", "year"))

print(fdi_securitypers_naive_m27)



fdi_securitypers_controls_m28 <- feols(fml = personalism_security ~ fdi_cpi_pc_pos_log_lag + 
                                         gdppc_log_lag + 
                                         gdppc_growth_lag +
                                         population_log_lag + 
                                         oilpc_log_lag + 
                                         civilwar_lag + 
                                         leader_duration_log_lag | leader_id + year,
                                       data = data2,
                                       cluster = "leader_id",
                                       panel.id = c("leader_id", "year"))

print(fdi_securitypers_controls_m28)



fdi_partypers_naive_m31 <- feols(fml = personalism_party ~ fdi_cpi_pc_pos_log_lag | leader_id + year,
                                 data = data2,
                                 cluster = "leader_id",
                                 panel.id = c("leader_id", "year"))

print(fdi_partypers_naive_m31)


fdi_partypers_controls_m32 <- feols(fml = personalism_party ~ fdi_cpi_pc_pos_log_lag + 
                                      gdppc_log_lag + 
                                      gdppc_growth_lag + 
                                      population_log_lag + 
                                      oilpc_log_lag + 
                                      civilwar_lag + 
                                      leader_duration_log_lag | leader_id + year,
                                    data = data2,
                                    cluster = "leader_id",
                                    panel.id = c("leader_id", "year"))

print(fdi_partypers_controls_m32)



# Table 5: Foreign aid per capita, FDI per capita, and security and party personalism # 


# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.


etable(aid_securitypers_naive_m25, aid_securitypers_controls_m26,
       fdi_securitypers_naive_m27, fdi_securitypers_controls_m28,
       aid_partypers_naive_m29, aid_partypers_controls_m30,
       fdi_partypers_naive_m31, fdi_partypers_controls_m32,
       style.df = style.df(depvar.title = "",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita, FDI per capita, and security and party personalism")


# Export Table 5 (PNG)

etable(aid_securitypers_naive_m25, aid_securitypers_controls_m26,
       fdi_securitypers_naive_m27, fdi_securitypers_controls_m28,
       aid_partypers_naive_m29, aid_partypers_controls_m30,
       fdi_partypers_naive_m31, fdi_partypers_controls_m32,
       style.df = style.df(depvar.title = "",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita, FDI per capita, and security and party personalism",
       export = "Table 5.png")



## Analyses main paper ends here --------------------------------------------


# APPENDIX -----------------------------------------------------------------


## Data -------------------------------------------------------------------

# Set dictionary for all regression tables #

setFixest_dict(c(oda_constant_pc_pos_log_lag = "ODA pc (log)", 
                 oda_constant_pc_neg_log_lag = "ODA pc, incl. neg. (log)",
                 oda_current_pc_pos_log_lag = "ODA pc, current USD (log)",
                 oda_current_pc_neg_log_lag = "ODA pc, current USD, incl. neg. (log)",
                 oda_aiddata_pc_log_lag = "ODA pc, AidData Project (log)",
                 eco_aid_pc_log_lag = "Economic aid pc (log)",
                 demo_aid_pc_log_lag = "Democracy aid pc (log)",
                 fdi_cpi_pc_pos_log_lag = "FDI pc (log)",
                 fdi_cpi_pc_neg_log_lag = "FDI pc, incl. neg. (log)",
                 fdi_current_pc_pos_log_lag = "FDI pc, current USD (log)",
                 fdi_current_pc_neg_log_lag = "FDI pc, current USD, incl. neg. (log)",
                 fdi_cpi_pc_cub_lag = "FDI pc (cube root)",
                 fdi_gdp_cub_lag = "FDI, of GDP (cube root)",
                 gdppc_log_lag = "GDP pc (log)",
                 population_log_lag = "Population (log)",
                 oilpc_log_lag = "Oil pc (log)",
                 gdppc_growth_lag = "Growth pc",
                 leader_duration_log_lag = "Leader duration (log)",
                 civilwar_lag = "Civil War",
                 leader_id = "Leader",
                 personalism = "Personalism",
                 year = "Year",
                 time_trend_fdi = "Global time trend",
                 time_trend_fdi2 = "Global time trend$^2$",
                 time_trend_fdi3 = "Global time trend$^3$",
                 time_trend_aid = "Global time trend",
                 time_trend_aid2 = "Global time trend$^2$",
                 time_trend_aid3 = "Global time trend$^3$"))



## Summary statistics ------------------------------------------------------

# Table A1 has been created in STATA. 
# See "Summary stats.do" file.


## Robustness checks: Foreign Aid----------------------------------------------

### Aid: Negative values ----------------------------------------------------  

# These models include the negative values of ODA #


# 1960-2010

aid_negative_m1 <- feols(fml = personalism ~ oda_constant_pc_neg_log_lag +
                           gdppc_log_lag + 
                           gdppc_growth_lag + 
                           population_log_lag + 
                           oilpc_log_lag + 
                           civilwar_lag + 
                           leader_duration_log_lag | leader_id + year,
                         data = data,
                         cluster = "leader_id",
                         panel.id = c("leader_id", "year"))

print(aid_negative_m1)


# Cold War #


aid_negative_cw1_m2 <- feols(fml = personalism ~ oda_constant_pc_neg_log_lag + 
                               gdppc_log_lag + 
                               gdppc_growth_lag + 
                               population_log_lag + 
                               oilpc_log_lag + 
                               civilwar_lag + 
                               leader_duration_log_lag | leader_id + year,
                             data = data_cw1,
                             cluster = "leader_id",
                             panel.id = c("leader_id", "year"))

print(aid_negative_cw1_m2)


# Post Cold War #


aid_negative_cw0_m3 <- feols(fml = personalism ~ oda_constant_pc_neg_log_lag + 
                               gdppc_log_lag + 
                               gdppc_growth_lag + 
                               population_log_lag + 
                               oilpc_log_lag + 
                               civilwar_lag + 
                               leader_duration_log_lag | leader_id + year,
                             data = data_cw0,
                             cluster = "leader_id",
                             panel.id = c("leader_id", "year"))

print(aid_negative_cw0_m3)



# Table A2: Foreign aid per capita and personalism, 1960-2010. Including negative values #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.

etable(aid_negative_m1, 
       aid_negative_cw1_m2, 
       aid_negative_cw0_m3, 
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010. Including negative values")


# Export Table A2 (PNG)

etable(aid_negative_m1, 
       aid_negative_cw1_m2, 
       aid_negative_cw0_m3, 
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010. Including negative values",
       export = "Table A2.png")




### Aid: Modelling time (polynomials and 1WFE) --------------------------------


# We create first the time trend for aid and the polynomials #

data_aid_type <- data[!(data$year < 1960),]

data_aid_type <- data_aid_type %>%
  mutate(time_trend_aid = case_when(year == 1960 ~ 0,
                                    year == 1961 ~ 1,
                                    year == 1962 ~ 2,
                                    year == 1963 ~ 3, 
                                    year == 1964 ~ 4,
                                    year == 1965 ~ 5,
                                    year == 1966 ~ 6,
                                    year == 1967 ~ 7,
                                    year == 1968 ~ 8,
                                    year == 1969 ~ 9,
                                    year == 1970 ~ 10,
                                    year == 1971 ~ 11,
                                    year == 1972 ~ 12,
                                    year == 1973 ~ 13,
                                    year == 1974 ~ 14,
                                    year == 1975 ~ 15,
                                    year == 1976 ~ 16,
                                    year == 1977 ~ 17,
                                    year == 1978 ~ 18,
                                    year == 1979 ~ 19,
                                    year == 1980 ~ 20,
                                    year == 1981 ~ 21,
                                    year == 1982 ~ 22,
                                    year == 1983 ~ 23,
                                    year == 1984 ~ 24,
                                    year == 1985 ~ 25,
                                    year == 1986 ~ 26,
                                    year == 1987 ~ 27,
                                    year == 1988 ~ 28,
                                    year == 1989 ~ 29,
                                    year == 1990 ~ 30,
                                    year == 1991 ~ 31,
                                    year == 1992 ~ 32,
                                    year == 1993 ~ 33,
                                    year == 1994 ~ 34,
                                    year == 1995 ~ 35,
                                    year == 1996 ~ 36,
                                    year == 1997 ~ 37,
                                    year == 1998 ~ 38,
                                    year == 1999 ~ 39,
                                    year == 2000 ~ 40,
                                    year == 2001 ~ 41,
                                    year == 2002 ~ 42,
                                    year == 2003 ~ 43,
                                    year == 2004 ~ 44,
                                    year == 2005 ~ 45,
                                    year == 2006 ~ 46,
                                    year == 2007 ~ 47,
                                    year == 2008 ~ 48,
                                    year == 2009 ~ 49,
                                    year == 2010 ~ 50),
         time_trend_aid2 = time_trend_aid^2,
         time_trend_aid3 = time_trend_aid^3)



aid_poly_naive_m1 <- feols(fml = personalism ~ oda_constant_pc_pos_log_lag + 
                             time_trend_aid + 
                             time_trend_aid2 + 
                             time_trend_aid3 | leader_id,
                           data = data_aid_type,
                           cluster = "leader_id",
                           panel.id = c("leader_id", "year"))

print(aid_poly_naive_m1)


aid_poly_controls_m2 <- feols(fml = personalism ~ oda_constant_pc_pos_log_lag +
                                gdppc_log_lag + 
                                gdppc_growth_lag + 
                                population_log_lag + 
                                oilpc_log_lag + 
                                civilwar_lag + 
                                leader_duration_log_lag + 
                                time_trend_aid + 
                                time_trend_aid2 + 
                                time_trend_aid3   | leader_id,
                              data = data_aid_type,
                              cluster = "leader_id",
                              panel.id = c("leader_id", "year"))

print(aid_poly_controls_m2)




aid_owfe_naive_m3 <- feols(fml = personalism ~ oda_constant_pc_pos_log_lag | leader_id,
                           data = data,
                           cluster = "leader_id",
                           panel.id = c("leader_id", "year"))

print(aid_owfe_naive_m3)


aid_owfe_controls_m4 <- feols(fml = personalism ~ oda_constant_pc_pos_log_lag + 
                                gdppc_log_lag + 
                                gdppc_growth_lag + 
                                population_log_lag + 
                                oilpc_log_lag + 
                                civilwar_lag + 
                                leader_duration_log_lag | leader_id,
                              data = data,
                              cluster = "leader_id",
                              panel.id = c("leader_id", "year"))


print(aid_owfe_controls_m4)



# Table A3: Foreign aid per capita and personalism, 1960-2010. Modelling time #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.

etable(aid_poly_naive_m1, aid_poly_controls_m2,
       aid_owfe_naive_m3, aid_owfe_controls_m4,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010. Modelling time")


# Export Table A3 (PNG)

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.

etable(aid_poly_naive_m1, aid_poly_controls_m2,
       aid_owfe_naive_m3, aid_owfe_controls_m4,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010. Modelling time",
       export = "Table A3.png")



### Aid: current USD --------------------------------------------------------

# 1960-2010 #


aid_current_m1 <- feols(fml = personalism ~ oda_current_pc_pos_log_lag + 
                          gdppc_log_lag + 
                          gdppc_growth_lag + 
                          population_log_lag + 
                          oilpc_log_lag + 
                          civilwar_lag + 
                          leader_duration_log_lag | leader_id + year,
                        data = data,
                        cluster = "leader_id",
                        panel.id = c("leader_id", "year"))

print(aid_current_m1)


# Cold War #

aid_current_cw1_m2 <- feols(fml = personalism ~ oda_current_pc_pos_log_lag + 
                              gdppc_log_lag + 
                              gdppc_growth_lag + 
                              population_log_lag + 
                              oilpc_log_lag + 
                              civilwar_lag + 
                              leader_duration_log_lag | leader_id + year,
                            data = data_cw1,
                            cluster = "leader_id",
                            panel.id = c("leader_id", "year"))

print(aid_current_cw1_m2)


# Post Cold War #


aid_current_cw0_m3 <- feols(fml = personalism ~ oda_current_pc_pos_log_lag + 
                              gdppc_log_lag + 
                              gdppc_growth_lag + 
                              population_log_lag + 
                              oilpc_log_lag + 
                              civilwar_lag + 
                              leader_duration_log_lag | leader_id + year,
                            data = data_cw0,
                            cluster = "leader_id",
                            panel.id = c("leader_id", "year"))

print(aid_current_cw0_m3)



# Table A4: Foreign aid per capita and personalism, 1960-2010. Aid in current USD #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.


etable(aid_current_m1,  
       aid_current_cw1_m2, 
       aid_current_cw0_m3, 
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010. Aid in current USD")


# Export Table A4 (PNG)

etable(aid_current_m1,  
       aid_current_cw1_m2, 
       aid_current_cw0_m3, 
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010. Aid in current USD",
       export = "Table A4.png")





### Aid: current USD including negative values of aid ------------------------


# 1960-2010 #


aid_current_negative_m1 <- feols(fml = personalism ~ oda_current_pc_neg_log_lag + 
                                   gdppc_log_lag + 
                                   gdppc_growth_lag + 
                                   population_log_lag + 
                                   oilpc_log_lag + 
                                   civilwar_lag + 
                                   leader_duration_log_lag | leader_id + year,
                                 data = data,
                                 cluster = "leader_id",
                                 panel.id = c("leader_id", "year"))

print(aid_current_negative_m1)


# Cold War #

aid_current_negative_cw1_m2 <- feols(fml = personalism ~ oda_current_pc_neg_log_lag + 
                                       gdppc_log_lag + 
                                       gdppc_growth_lag + 
                                       population_log_lag + 
                                       oilpc_log_lag + 
                                       civilwar_lag + 
                                       leader_duration_log_lag | leader_id + year,
                                     data = data_cw1,
                                     cluster = "leader_id",
                                     panel.id = c("leader_id", "year"))

print(aid_current_negative_cw1_m2) 


# Post Cold War #


aid_current_negative_cw0_m3 <- feols(fml = personalism ~ oda_current_pc_neg_log_lag + 
                                       gdppc_log_lag + 
                                       gdppc_growth_lag + 
                                       population_log_lag + 
                                       oilpc_log_lag + 
                                       civilwar_lag + 
                                       leader_duration_log_lag | leader_id + year,
                                     data = data_cw0,
                                     cluster = "leader_id",
                                     panel.id = c("leader_id", "year"))

print(aid_current_negative_cw0_m3)




# Table A5: Foreign aid per capita and personalism, 1960-2010. Aid in current USD, including negative values #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.


etable(aid_current_negative_m1, 
       aid_current_negative_cw1_m2, 
       aid_current_negative_cw0_m3, 
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010. Aid in current USD, including negative values")


# Export table A5 (PNG)

etable(aid_current_negative_m1, 
       aid_current_negative_cw1_m2, 
       aid_current_negative_cw0_m3, 
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010. Aid in current USD, including negative values",
       export = "Table A5.png")



### Aid: AidData Project ----------------------------------------------------


# Comparison plot ODA WDI and ODA AidData Project (Figure A1)

subdata <- data %>%
  group_by(year) %>%
  summarise(oda_wdi = sum(oda_constant_pc_pos_log_lag, na.rm = T),
            oda_aiddata = sum(oda_aiddata_pc_log, na.rm = T))

subdata <- subdata[-(1:14),]

col <- c("#666666", "#CCCCCC")

figure_a1 <- ggplot(subdata, 
                    aes(x = year)) +
  geom_area(aes(y = oda_wdi,
                fill = "WDI data")) + 
  geom_area(aes(y = oda_aiddata,
                fill = "AidData Project data")) +
  scale_y_continuous(name = "Total ODA per capita in cons USD (log)") +
  scale_x_continuous(breaks = seq(1955, 2010, by = 5)) +
  scale_fill_manual(values = col) +
  labs(x = "Year") + 
  theme_bw() +
  theme(axis.text = element_text(size = 25),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 25),
        legend.title = element_blank(),
        legend.position =  "top")

figure_a1

ggsave(figure_a1, file = "Figure A1.pdf", width = 20, height = 10)


# 1960-2010 #


aid_aidata_m1 <- feols(fml = personalism ~ oda_aiddata_pc_log_lag + 
                         gdppc_log_lag + 
                         gdppc_growth_lag + 
                         population_log_lag + 
                         oilpc_log_lag + 
                         civilwar_lag + 
                         leader_duration_log_lag | leader_id + year,
                       data = data,
                       cluster = "leader_id",
                       panel.id = c("leader_id", "year"))

print(aid_aidata_m1)


# COLD WAR #

aid_aiddata_cw1_m2 <- feols(fml = personalism ~ oda_aiddata_pc_log_lag + 
                              gdppc_log_lag + 
                              gdppc_growth_lag + 
                              population_log_lag + 
                              oilpc_log_lag + 
                              civilwar_lag + 
                              leader_duration_log_lag | leader_id + year,
                            data = data_cw1,
                            cluster = "leader_id",
                            panel.id = c("leader_id", "year"))


print(aid_aiddata_cw1_m2)


# Post Cold War #


aid_aidata_cw0_m3 <- feols(fml = personalism ~ oda_aiddata_pc_log_lag + 
                             gdppc_log_lag + 
                             gdppc_growth_lag + 
                             population_log_lag + 
                             oilpc_log_lag + 
                             civilwar_lag + 
                             leader_duration_log_lag | leader_id + year,
                           data = data_cw0,
                           cluster = "leader_id",
                           panel.id = c("leader_id", "year"))

print(aid_aidata_cw0_m3)


# Table A6: Foreign aid per capita and personalism, 1960-2010. AidData Project Data #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.


etable(aid_aidata_m1, aid_aiddata_cw1_m2, aid_aidata_cw0_m3,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010. AidData Project Data")


# Export Table A6 (PNG)

etable(aid_aidata_m1, aid_aiddata_cw1_m2, aid_aidata_cw0_m3,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Foreign aid per capita and personalism, 1960-2010. AidData Project Data",
       export = "Table A6.png")



### Aid: Economic and democracy aid -----------------------------------------


# Figure A2 #

data_aid_type <- data %>%
  group_by(year) %>%
  summarise(eco_aid = sum(eco_aid_log, na.rm = T),
            demo_aid = sum(demo_aid_log, na.rm = T),
            eco_aid_pc = sum(eco_aid_pc_log, na.rm = T),
            demo_aid_pc = sum(demo_aid_pc_log, na.rm = T))


figure_a2_a <- ggplot(data_aid_type, 
                      aes(x = year)) +
  geom_col(aes(y = eco_aid, 
               fill = "Economic aid (log)"),
           size = 1) +
  geom_line(aes(y = eco_aid_pc*6, 
                color = "Economic aid pc (log)"),
            size = 1) +
  scale_fill_manual(values = c("Economic aid (log)" = "#969696")) +
  scale_color_manual(values = c("Economic aid pc (log)" = "black")) + 
  labs(color = "Type of aid") +
  geom_vline(xintercept = data_aid_type$year[46], 
             linetype = "dashed",
             color = "black")+
  geom_text(label = "Post Cold War",
            x = 1998,
            y = 1825,
            size = 6,
            color = "black") +
  geom_text(label = "Cold War",
            x = 1986,
            y = 1825,
            size = 6,
            color = "black") +
  labs(x = "Year", 
       y = "Aid (log)", 
       color = " ",
       title = "Economic aid (1946-2010)") +
  scale_x_continuous(breaks = seq(1946, 2010, by = 4)) +
  scale_y_continuous(name = "Economic aid (log)",
                     sec.axis = sec_axis( trans=~./6, name="Economic aid per capita (log)")) +
  theme_bw() +
  theme(axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 22),
        axis.title.y = element_text(size = 20),
        axis.text.y = element_text(size = 22),
        plot.title = element_text(size = 20, 
                                  hjust = 0.5, 
                                  face = "bold"),
        legend.text = element_text(size = 12),
        legend.title = element_blank()) 

figure_a2_a

ggsave(figure_a2_a, file = "Figure A2_a.pdf", width = 20, height = 10)


figure_a2_b <- ggplot(data_aid_type, 
                      aes(x = year)) +
  geom_col(aes(y = demo_aid, 
               fill = "Democracy aid (log)"),
           size = 1) +
  geom_line(aes(y = demo_aid_pc*18, 
                color = "Democracy aid pc (log)"),
            size = 1) +
  scale_fill_manual(values = c("Democracy aid (log)" = "#969696")) +
  scale_color_manual(values = c("Democracy aid pc (log)" = "black")) + 
  labs(color = "Type of aid") +
  geom_vline(xintercept = data_aid_type$year[46], 
             linetype = "dashed",
             color = "black")+
  geom_text(label = "Post Cold War",
            x = 1998,
            y = 100,
            size = 6,
            color = "black") +
  geom_text(label = "Cold War",
            x = 1986,
            y = 100,
            size = 6,
            color = "black") +
  labs(x = "Year", 
       y = "Aid (log)", 
       color = " ",
       title = "Democracy aid (1946-2010)") +
  scale_x_continuous(breaks = seq(1946, 2010, by = 4)) +
  scale_y_continuous(name = "Democracy aid (log)",
                     sec.axis = sec_axis( trans=~./18, name="Democracy aid per capita (log)")) +
  theme_bw() +
  theme(axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 22),
        axis.title.y = element_text(size = 20),
        axis.text.y = element_text(size = 22),
        plot.title = element_text(size = 20, 
                                  hjust = 0.5, 
                                  face = "bold"),
        legend.text = element_text(size = 12),
        legend.title = element_blank())

figure_a2_b

ggsave(figure_a2_b, file = "Figure A2_b.pdf", width = 20, height = 10)



# Economic aid: 1946-2010 #


aid_economic_m1 <- feols(fml = personalism ~ eco_aid_pc_log_lag + 
                           gdppc_log_lag + 
                           gdppc_growth_lag + 
                           population_log_lag + 
                           oilpc_log_lag + 
                           civilwar_lag + 
                           leader_duration_log_lag | leader_id + year,
                         data = data,
                         cluster = "leader_id",
                         panel.id = c("leader_id", "year"))

print(aid_economic_m1)


# Economic aid: Cold War #

aid_economic_cw1_m3 <- feols(fml = personalism ~ eco_aid_pc_log_lag + 
                               gdppc_log_lag + 
                               gdppc_growth_lag + 
                               population_log_lag + 
                               oilpc_log_lag + 
                               civilwar_lag + 
                               leader_duration_log_lag | leader_id + year,
                             data = data_cw1,
                             cluster = "leader_id",
                             panel.id = c("leader_id", "year"))

print(aid_economic_cw1_m3)


# Economic aid: Post Cold War #

aid_economic_cw0_m5 <- feols(fml = personalism ~ eco_aid_pc_log_lag + 
                               gdppc_log_lag + 
                               gdppc_growth_lag + 
                               population_log_lag + 
                               oilpc_log_lag + 
                               civilwar_lag + 
                               leader_duration_log_lag | leader_id + year,
                             data = data_cw0,
                             cluster = "leader_id",
                             panel.id = c("leader_id", "year"))

print(aid_economic_cw0_m5)


# Democracy aid: 1946-2010 #


aid_democracy_m2 <- feols(fml = personalism ~ demo_aid_pc_log_lag + 
                            gdppc_log_lag + 
                            gdppc_growth_lag + 
                            population_log_lag + 
                            oilpc_log_lag + 
                            civilwar_lag + 
                            leader_duration_log_lag | leader_id + year,
                          data = data,
                          cluster = "leader_id",
                          panel.id = c("leader_id", "year"))

print(aid_democracy_m2)


# Democracy aid: Cold War #


aid_democracy_cw1_m4 <- feols(fml = personalism ~ demo_aid_pc_log_lag +
                                gdppc_log_lag + 
                                gdppc_growth_lag + 
                                population_log_lag + 
                                oilpc_log_lag + 
                                civilwar_lag + 
                                leader_duration_log_lag | leader_id + year,
                              data = data_cw1,
                              cluster = "leader_id",
                              panel.id = c("leader_id", "year"))

print(aid_democracy_cw1_m4)


# Democracy aid: Post Cold War #


aid_democracy_cw0_m6 <- feols(fml = personalism ~ demo_aid_pc_log_lag + 
                                gdppc_log_lag + 
                                gdppc_growth_lag + 
                                population_log_lag + 
                                oilpc_log_lag + 
                                civilwar_lag + 
                                leader_duration_log_lag | leader_id + year,
                              data = data_cw0,
                              cluster = "leader_id",
                              panel.id = c("leader_id", "year"))

print(aid_democracy_cw0_m6)



# Table A7: Economic and democracy aid per capita and personalism, 1946-2010 #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.

etable(aid_economic_m1, aid_democracy_m2,
       aid_economic_cw1_m3, aid_democracy_cw1_m4,
       aid_economic_cw0_m5, aid_democracy_cw0_m6,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Economic and democracy aid per capita and personalism, 1946-2010")


# Export Table A7 (PNG)

etable(aid_economic_m1, aid_democracy_m2,
       aid_economic_cw1_m3, aid_democracy_cw1_m4,
       aid_economic_cw0_m5, aid_democracy_cw0_m6,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "Economic and democracy aid per capita and personalism, 1946-2010",
       export = "Table A7.png")




### Aid: Mediation analysis (Table A8) --------------------------------------

# Even though we set a seed at the beginning of the script, the results might vary slightly between runs.

# Fit models for mediation analysis

# Military spendig = Mediator
# Aid pc = Treatment
# Paramilitary = outcome


model.m <- lm(milex_cpi_pc_log ~ oda_constant_pc_pos_log_lag + 
                gdppc_log_lag + 
                gdppc_growth_lag + 
                population_log_lag + 
                oilpc_log_lag + 
                civilwar_lag + 
                leader_duration_log_lag + 
                factor(leader_id) + 
                factor(year),
              data = data)

print(model.m)


model.y <- lm(paramil ~ oda_constant_pc_pos_log_lag + 
                milex_cpi_pc_log +
                gdppc_log_lag + 
                gdppc_growth_lag + 
                population_log_lag + 
                oilpc_log_lag + 
                civilwar_lag + 
                leader_duration_log_lag + 
                factor(leader_id) + 
                factor(year),
              data = data)


print(model.y)

# Compute cluster-robust standard errors

robust_se_m <- vcovCR(model.m, cluster = data$leader_id, type = "CR2")
robust_se_y <- vcovCR(model.y, cluster = data$leader_id, type = "CR2")


# Summarize models with robust standard errors

summary_m <- coeftest(model.m, vcov = robust_se_m)
summary_y <- coeftest(model.y, vcov = robust_se_y)

print(summary_m)
print(summary_y)


# Perform mediation analysis

model.out <- mediate(model.m,
                     model.y,
                     treat = "oda_constant_pc_pos_log_lag",
                     mediator = "milex_cpi_pc_log",
                     boot = F,
                     sims = 2500)


# Summary of the mediation analysis (Table A8)

summary(model.out)

# Table A8 in main paper has been edited manually in Overleaf according to the output from summary(model.out)

# Export Table A8 as HTML file (it doesn't export other formats)

modelsummary(model.out, stars = T, output = "Table A8.html")


### Aid: Mediation analysis (Table A9) --------------------------------------

# Fit models using lm

# Military spendig = Mediator
# Aid pc = Treatment
# Security Personalism = outcome

model.m5 <- lm(milex_cpi_pc_log ~ oda_constant_pc_pos_log_lag + 
                 gdppc_log_lag + 
                 gdppc_growth_lag + 
                 population_log_lag + 
                 oilpc_log_lag + 
                 civilwar_lag + 
                 leader_duration_log_lag + 
                 factor(leader_id) + 
                 factor(year),
               data = data2)

print(model.m5)


model.y5 <- lm(personalism_security ~ oda_constant_pc_pos_log_lag + 
                 milex_cpi_pc_log +
                 gdppc_log_lag + 
                 gdppc_growth_lag + 
                 population_log_lag + 
                 oilpc_log_lag + 
                 civilwar_lag + 
                 leader_duration_log_lag + 
                 factor(leader_id) + 
                 factor(year),
               data = data2)


print(model.y5)


# Compute cluster-robust standard errors

robust_se_m5 <- vcovCR(model.m5, cluster = data$leader_id, type = "CR2")
robust_se_y5 <- vcovCR(model.y5, cluster = data$leader_id, type = "CR2")


# Summarize models with robust standard errors

summary_m5 <- coeftest(model.m5, vcov = robust_se_m5)
summary_y5 <- coeftest(model.y5, vcov = robust_se_y5)

print(summary_m5)
print(summary_y5)


# Perform mediation analysis

model.out5 <- mediate(model.m5,
                      model.y5,
                      treat = "oda_constant_pc_pos_log_lag",
                      mediator = "milex_cpi_pc_log",
                      boot = F,
                      sims = 2500)


# Summary of the mediation analysis (Table A9)

summary(model.out5)

# Table A9 in main paper has been edited manually in Overleaf according to the output from summary(model.out5)


# Export Table A9 as HTML file (it doesn't export other formats)

modelsummary(model.out5, stars = T, output = "Table A9.html")



## Robustness checks: FDI --------------------------------------------------------

### FDI: negative values ------------------------------------------------------


fdi_negative_m1 <- feols(fml = personalism ~ fdi_cpi_pc_neg_log_lag + 
                           gdppc_log_lag + 
                           gdppc_growth_lag + 
                           population_log_lag + 
                           oilpc_log_lag + 
                           civilwar_lag + 
                           leader_duration_log_lag | leader_id + year,
                         data = data,
                         cluster = "leader_id",
                         panel.id = c("leader_id", "year"))

print(fdi_negative_m1) 


# Table A10: FDI per capita and personalism, 1970-2010. Including negative FDI values #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.

etable(fdi_negative_m1, 
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and personalism, 1970-2010. Including negative FDI values")


# Export Table A10 (PNG)

etable(fdi_negative_m1, 
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and personalism, 1970-2010. Including negative FDI values",
       export = "Table A10.png")




### FDI: Modelling time (polynomials and 1WFE) --------------------------------


# We create first the time trend for FDI and the polynomials #

data_fdi <- data[!(data$year < 1970),]

data_fdi <- data_fdi %>%
  mutate(time_trend_fdi = case_when(year == 1970 ~ 0,
                                    year == 1971 ~ 1,
                                    year == 1972 ~ 2,
                                    year == 1973 ~ 3,
                                    year == 1974 ~ 4,
                                    year == 1975 ~ 5,
                                    year == 1976 ~ 6,
                                    year == 1977 ~ 7,
                                    year == 1978 ~ 8,
                                    year == 1979 ~ 9,
                                    year == 1980 ~ 10,
                                    year == 1981 ~ 11,
                                    year == 1982 ~ 12,
                                    year == 1983 ~ 13,
                                    year == 1984 ~ 14,
                                    year == 1985 ~ 15,
                                    year == 1986 ~ 16,
                                    year == 1987 ~ 17,
                                    year == 1988 ~ 18,
                                    year == 1989 ~ 19,
                                    year == 1990 ~ 20,
                                    year == 1991 ~ 21,
                                    year == 1992 ~ 22,
                                    year == 1993 ~ 23,
                                    year == 1994 ~ 24,
                                    year == 1995 ~ 25,
                                    year == 1996 ~ 26,
                                    year == 1997 ~ 27,
                                    year == 1998 ~ 28,
                                    year == 1999 ~ 29,
                                    year == 2000 ~ 30,
                                    year == 2001 ~ 31,
                                    year == 2002 ~ 32,
                                    year == 2003 ~ 33,
                                    year == 2004 ~ 34,
                                    year == 2005 ~ 35,
                                    year == 2006 ~ 36,
                                    year == 2007 ~ 37,
                                    year == 2008 ~ 38,
                                    year == 2009 ~ 39,
                                    year == 2010 ~ 40),
         time_trend_fdi2 = time_trend_fdi^2,
         time_trend_fdi3 = time_trend_fdi^3)



fdi_poly_naive_m1 <- feols(fml = personalism ~ fdi_cpi_pc_pos_log_lag + 
                             time_trend_fdi + 
                             time_trend_fdi2 + 
                             time_trend_fdi3 | leader_id,
                           data = data_fdi,
                           cluster = "leader_id",
                           panel.id = c("leader_id", "year"))

print(fdi_poly_naive_m1)



fdi_poly_controls_m2 <- feols(fml = personalism ~ fdi_cpi_pc_pos_log_lag + 
                                gdppc_log_lag + 
                                gdppc_growth_lag + 
                                population_log_lag + 
                                oilpc_log_lag + 
                                civilwar_lag + 
                                leader_duration_log_lag + 
                                time_trend_fdi + 
                                time_trend_fdi2 + 
                                time_trend_fdi3 | leader_id,
                              data = data_fdi,
                              cluster = "leader_id",
                              panel.id = c("leader_id", "year"))

print(fdi_poly_controls_m2)


fdi_owfe_naive_m3 <- feols(fml = personalism ~ fdi_cpi_pc_pos_log_lag | leader_id,
                           data = data,
                           cluster = "leader_id",
                           panel.id = c("leader_id", "year"))

print(fdi_owfe_naive_m3)


fdi_owfe_controls_m4 <- feols(fml = personalism ~ fdi_cpi_pc_pos_log_lag + 
                                gdppc_log_lag + 
                                gdppc_growth_lag + 
                                population_log_lag + 
                                oilpc_log_lag + 
                                civilwar_lag + 
                                leader_duration_log_lag | leader_id,
                              data = data,
                              cluster = "leader_id",
                              panel.id = c("leader_id", "year"))


# Table A11: FDI per capita and personalism, 1970-2010. Modelling time #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.

etable(fdi_poly_naive_m1, fdi_poly_controls_m2,
       fdi_owfe_naive_m3, fdi_owfe_controls_m4,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and personalism, 1970-2010. Modelling time")


# export Table A11 (PNG)

etable(fdi_poly_naive_m1, fdi_poly_controls_m2,
       fdi_owfe_naive_m3, fdi_owfe_controls_m4,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and personalism, 1970-2010. Modelling time",
       export = "Table A11.png")



### FDI: Current USD ---------------------------------------------------------


fdi_current_m1 <- feols(fml = personalism ~ fdi_current_pc_pos_log_lag + 
                          gdppc_log_lag + 
                          gdppc_growth_lag + 
                          population_log_lag + 
                          oilpc_log_lag + 
                          civilwar_lag + 
                          leader_duration_log_lag | leader_id + year,
                        data = data,
                        cluster = "leader_id",
                        panel.id = c("leader_id", "year"))

print(fdi_current_m1)



fdi_current_negative_m2 <- feols(fml = personalism ~ fdi_current_pc_neg_log_lag +
                                   gdppc_log_lag + 
                                   gdppc_growth_lag + 
                                   population_log_lag + 
                                   oilpc_log_lag + 
                                   civilwar_lag + 
                                   leader_duration_log_lag | leader_id + year,
                                 data = data,
                                 cluster = "leader_id",
                                 panel.id = c("leader_id", "year"))

print(fdi_current_negative_m2)


# Table A12: FDI per capita and personalism, 1970-2010. FDI in current USD #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.

etable(fdi_current_m1, 
       fdi_current_negative_m2, 
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and personalism, 1970-2010. FDI in current USD")


# Export Table A12 (PNG)

etable(fdi_current_m1, 
       fdi_current_negative_m2, 
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and personalism, 1970-2010. FDI in current USD",
       export = "Table A12.png")



### FDI: Cube root -----------------------------------------------------------


fdi_cube_m1 <- feols(fml = personalism ~ fdi_cpi_pc_cub_lag + 
                       gdppc_log_lag + 
                       gdppc_growth_lag + 
                       population_log_lag + 
                       oilpc_log_lag + 
                       civilwar_lag + 
                       leader_duration_log_lag | leader_id + year,
                     data = data,
                     cluster = "leader_id",
                     panel.id = c("leader_id", "year"))

print(fdi_cube_m1)



# Table A13: FDI per capita and personalism, 1970-2010. Cube root #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.


# The coefficient for FDI pc (cube root) produced by etable() is 0.000***.
# The real coefficient in feols object "fdi_cube_m1" is -0.000***.
# We then edit the output produced by etable() in Overleaf.

etable(fdi_cube_m1,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and personalism, 1970-2010. Cube root")


# Export Table A13 (PNG)

etable(fdi_cube_m1,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI per capita and personalism, 1970-2010. Cube root",
       export = "Table A13.png")



### FDI: % of GDP -----------------------------------------------------------


fdi_gdp_m1 <- feols(fml = personalism ~ fdi_gdp_cub_lag + 
                      gdppc_log_lag + 
                      gdppc_growth_lag + 
                      population_log_lag + 
                      oilpc_log_lag + 
                      civilwar_lag + 
                      leader_duration_log_lag | leader_id + year,
                    data = data,
                    cluster = "leader_id",
                    panel.id = c("leader_id", "year"))

print(fdi_gdp_m1)


# Table A14: FDI as percentage of GDP and personalism, 1970-2010 #

# Note 1: that this function might change the sign of the coefficients when they are smaller that -0.0004 (aprox).
# Note 2: We then edit the output in Overleaf.

etable(fdi_gdp_m1,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI as percentage of GDP and personalism, 1970-2010")


# Export Table A14 (PNG)

etable(fdi_gdp_m1,
       style.df = style.df(depvar.title = " ",
                           fixef.title = "",
                           fixef.suffix = "FE",
                           yesNo = c("Yes", "No")),
       digits.stats = 3,
       digits = "r3",
       powerBelow = -6,
       tex = T,
       title = "FDI as percentage of GDP and personalism, 1970-2010",
       export = "Table A14.png")





## Leave one out tests --------------------------------------------------------

### Aid: Leave one out test --------------------------------------------------

# Create a data frame for mapping leader_id to shortened names
shortened_names_df <- data.frame(
  leader_id = c(38, 39, 40, 70, 71, 125, 131, 132, 216, 217, 237, 242, 243, 245,
                260, 261, 263, 264, 265, 271, 272, 273, 289, 330, 331, 335, 336, 352,
                359, 393, 454, 455, 469, 473, 475, 506),  # Add all relevant IDs here
  shortened_leader_name = c("H. Aliyev", "I. Aliyiev", "M. Ahmed", "I. Khama", "S. Khama", "Yho.-Opan./Sass.-Ngue.",
                            "F. Castro", "R. Castro", "F. Duvalier", "J-C Duvalier", "Reza Pahlavi", "Rahman Aref", "Salam Aref",
                            "S. Hussein", "Doo-hwan", "Chung Hee", "A. A. as Salim", "J. al Ahmad", "J. al Ahmed",
                            "C. Saignason", "K. Phomvihane", "K. Siphandon", "A. A. Badawi", "M. Shamsher", "P. Shamsher",
                            "A. Somoza", "L. Somoza", "G. Mohammad", "M. Zia-ul-Haq", "A. bin A. Al Saud", "B. Asad", "H. Asad",
                            "L. P. Songkhram", "P. Tinsulanonda", "S. Kraprayoon", "Nguyen Van Linh")
)

data <- merge(data, shortened_names_df, by = "leader_id", all.x = TRUE)

data$shortened_leader_name[is.na(data$shortened_leader_name)] <- data$leader_name[is.na(data$shortened_leader_name)]

# Create a list of leaders
leaders <- unique(data$leader_id)

leader_names <- data %>%
  distinct(leader_id, leader_name)

# Create a table to store coefficients, standard errors, and p-values
results_aid <- data.frame(
  coef = numeric(length(leaders)),
  se = numeric(length(leaders)),
  p_value = numeric(length(leaders)),
  leader_excluded = numeric(length(leaders)),
  leader_name = character(length(leaders)),
  stringsAsFactors = F
)

# Loop for each leader
for (i_aid in seq_along(leaders)) {
  # Subset the data excluding the current leader and remove rows with NA in relevant columns
  subset_data_aid <- data %>%
    filter(leader_id != leaders[i_aid]) %>%
    drop_na(oda_constant_pc_pos_log_lag, gdppc_log_lag, gdppc_growth_lag, 
            population_log_lag, oilpc_log_lag, civilwar_lag, leader_duration_log_lag, year)
  
  # Print subset data for debugging
  print(paste("Iteration", i_aid, ": Excluding leader", leaders[i_aid], "with", nrow(subset_data_aid), "rows"))
  
  # Run fixed effects regression with feols and clustered standard errors
  model_aid <- tryCatch({
    feols(fml = personalism ~ oda_constant_pc_pos_log_lag + 
            gdppc_log_lag + 
            gdppc_growth_lag + 
            population_log_lag + 
            oilpc_log_lag + 
            civilwar_lag + 
            leader_duration_log_lag | leader_id + year,
          data = subset_data_aid,
          cluster = "leader_id",
          panel.id = c("leader_id", "year"))
  }, error = function(e) return(NULL))
  
  # Check if the model ran successfully
  if (!is.null(model_aid)) {
    # Print the model summary for debugging
    print(summary(model_aid))
    
    # Extract the coefficient, standard error, and p-value for 'oda_constant_pc_pos_log_lag'
    coefs_aid <- coef(model_aid)
    if ("oda_constant_pc_pos_log_lag" %in% names(coefs_aid)) {
      # Get the coefficient directly
      results_aid$coef[i_aid] <- coefs_aid["oda_constant_pc_pos_log_lag"]
      
      # Get the standard error and p-value
      se_aid <- summary(model_aid)$coeftable
      results_aid$se[i_aid] <- se_aid["oda_constant_pc_pos_log_lag", "Std. Error"]
      results_aid$p_value[i_aid] <- se_aid["oda_constant_pc_pos_log_lag", "Pr(>|t|)"]
    } else {
      # If no coefficient was estimated, assign NA
      print(paste("No coefficient for oda_constant_pc_pos_log_lag in iteration", i_aid))
      results_aid$coef[i_aid] <- NA
      results_aid$se[i_aid] <- NA
      results_aid$p_value[i_aid] <- NA
    }
  } else {
    # If the model failed to run, assign NA
    print(paste("Model failed in iteration", i_aid))
    results_aid$coef[i_aid] <- NA
    results_aid$se[i_aid] <- NA
    results_aid$p_value[i_aid] <- NA
  }
  
  # Record which leader was excluded
  results_aid$leader_excluded[i_aid] <- leaders[i_aid]
  
  # Record the leader name for the excluded leader
  results_aid$leader_name[i_aid] <- data$shortened_leader_name[data$leader_id == leaders[i_aid]]
}

print(results_aid)


# Figure A3: Distribution of estimated coefficients leaving one leader out at a time, foreign aid

# Plot histogram of coefficients excluding NAs

figure_a3 <- ggplot(results_aid %>% filter(!is.na(coef)), aes(x = coef)) +
  geom_histogram(bins = 50, fill = "#4c4c4c", color = "#4c4c4c") +
  theme_bw() +
  labs(title = "Distribution of estimated coefficients leaving one leader out at a time, foreign aid", x = "Coefficient", y = "Frequency")

figure_a3

ggsave(figure_a3, file = "Figure A3.pdf", width = 20, height = 10)


# Figure A4: Estimated coefficients by excluded leader, foreign aid 

# Remove NAs for plotting and sort the results by leader_excluded
plot_data_aid <- results_aid %>% filter(!is.na(coef)) %>% arrange(leader_excluded)

# Create a categorical variable for p-value ranges with clear labels
plot_data_aid <- plot_data_aid %>%
  mutate(Significance = case_when(
    p_value <= 0.01 ~ "p <= 0.01",
    p_value <= 0.05 ~ "0.01 < p <= 0.05",
    p_value <= 0.1 ~ "0.05 < p <= 0.1",
    TRUE ~ "p > 0.1"
  ))

# Ensure that the factor levels for p_category are the same for all plots
plot_data_aid$Significance <- factor(plot_data_aid$Significance, 
                                     levels = c("p <= 0.01", "0.01 < p <= 0.05", 
                                                "0.05 < p <= 0.1", "p > 0.1"))

# Create a complete dataset with all possible p_categories
complete_categories_aid <- expand.grid(leader_name = unique(plot_data_aid$leader_name),
                                       Significance = levels(plot_data_aid$Significance))

# Merge with plot_data to ensure all categories are represented
plot_data_aid <- merge(complete_categories_aid, plot_data_aid, by = c("leader_name", "Significance"), all.x = TRUE)

# Filter out rows with NA in leader_excluded
filtered_data_aid <- plot_data_aid[!is.na(plot_data_aid$coef), ]
filtered_data_aid <- plot_data_aid[!is.na(plot_data_aid$se), ]
filtered_data_aid <- plot_data_aid[!is.na(plot_data_aid$p_value), ]

# Now use the filtered data for plotting
num_leaders_aid <- nrow(filtered_data_aid)
group_size_aid <- ceiling(num_leaders_aid / 3)  # Group size to ensure equal distribution

# Create a list to store plots
plots_aid <- vector("list", 3)

# Common color palette for p_value categories
color_palette_aid <- c("p <= 0.01" = "green", 
                       "0.01 < p <= 0.05" = "blue", 
                       "0.05 < p <= 0.1" = "yellow", 
                       "p > 0.1" = "red")

for (i_aid2 in seq_len(3)) {
  start_idx_aid <- (i_aid2 - 1) * group_size_aid + 1
  end_idx_aid <- min(i_aid2 * group_size_aid, num_leaders_aid)
  
  chunk_data_aid <- filtered_data_aid[start_idx_aid:end_idx_aid, ]
  
  # Calculate y-axis limits based on coefficients and standard errors
  y_min_aid <- min(chunk_data_aid$coef - chunk_data_aid$se, na.rm = TRUE)
  y_max_aid <- max(chunk_data_aid$coef + chunk_data_aid$se, na.rm = TRUE)
  
  # Add some margin for better visibility
  y_margin_aid <- (y_max_aid - y_min_aid) * 0.1  # 10% margin
  y_min_aid <- y_min_aid - y_margin_aid
  y_max_aid <- y_max_aid + y_margin_aid
  
  # Create the plot for the current chunk with adjusted y-axis limits
  plots_aid[[i_aid2]] <- ggplot(chunk_data_aid, aes(x = factor(leader_name), y = coef, color = Significance)) +
    geom_point(size = 3) +  # Fixed size for points
    geom_errorbar(aes(ymin = coef - se, ymax = coef + se), width = 0.1, color = "black", alpha = 0.5) +  # Black error bars
    labs(title = paste("Leaders", start_idx_aid, "to", end_idx_aid), 
         x = "Leader Name (Excluded)", 
         y = "Coefficient") +
    coord_cartesian(ylim = c(y_min_aid, y_max_aid)) +  # Dynamically set y-axis limits with margin
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6),  # Smaller font size
          plot.title = element_text(hjust = 0.5)) +  # Center title
    scale_color_manual(values = color_palette_aid)  # Use common color palette
}


# Combine all plots into one with a single legend using patchwork
combined_plot_aid <- wrap_plots(plots_aid, ncol = 1) & 
  plot_layout(guides = "collect") &  # Common legend
  theme(legend.position = "bottom",  # Set legend position at the bottom
        legend.box = "horizontal")  # Horizontal legend box

# Add a common legend
combined_plot_aid <- combined_plot_aid + plot_annotation(title = "Estimated coefficients by excluded leader, foreign aid")

# Print the combined plot
print(combined_plot_aid)


ggsave(combined_plot_aid, file = "Figure_A4.pdf", width = 20, height = 10)



### FDI: Leave one out test --------------------------------------------------

# Shorter names, list of leaders and list of leader names are already created in the previous section

# Create a table to store coefficients, standard errors, and p-values
results_fdi <- data.frame(
  coef = numeric(length(leaders)),
  se = numeric(length(leaders)),
  p_value = numeric(length(leaders)),
  leader_excluded = numeric(length(leaders)),
  leader_name = character(length(leaders)),
  stringsAsFactors = F
)

# Loop for each leader
for (i_fdi in seq_along(leaders)) {
  # Subset the data excluding the current leader and remove rows with NA in relevant columns
  subset_data_fdi <- data %>%
    filter(leader_id != leaders[i_fdi]) %>%
    drop_na(fdi_cpi_pc_pos_log_lag, gdppc_log_lag, gdppc_growth_lag, 
            population_log_lag, oilpc_log_lag, civilwar_lag, leader_duration_log_lag, year)
  
  # Print subset data for debugging
  print(paste("Iteration", i_fdi, ": Excluding leader", leaders[i_fdi], "with", nrow(subset_data_fdi), "rows"))
  
  # Run fixed effects regression with feols and clustered standard errors
  model_fdi <- tryCatch({
    feols(fml = personalism ~ fdi_cpi_pc_pos_log_lag + 
            gdppc_log_lag + 
            gdppc_growth_lag + 
            population_log_lag + 
            oilpc_log_lag + 
            civilwar_lag + 
            leader_duration_log_lag | leader_id + year,
          data = subset_data_fdi,
          cluster = "leader_id",
          panel.id = c("leader_id", "year"))
  }, error = function(e) return(NULL))
  
  # Check if the model ran successfully
  if (!is.null(model_fdi)) {
    # Print the model summary for debugging
    print(summary(model_fdi))
    
    # Extract the coefficient, standard error, and p-value for 'fdi_cpi_pc_pos_log_lag'
    coefs_fdi <- coef(model_fdi)
    if ("fdi_cpi_pc_pos_log_lag" %in% names(coefs_fdi)) {
      # Get the coefficient directly
      results_fdi$coef[i_fdi] <- coefs_fdi["fdi_cpi_pc_pos_log_lag"]
      
      # Get the standard error and p-value
      se_fdi <- summary(model_fdi)$coeftable
      results_fdi$se[i_fdi] <- se_fdi["fdi_cpi_pc_pos_log_lag", "Std. Error"]
      results_fdi$p_value[i_fdi] <- se_fdi["fdi_cpi_pc_pos_log_lag", "Pr(>|t|)"]
    } else {
      # If no coefficient was estimated, assign NA
      print(paste("No coefficient for fdi_cpi_pc_pos_log_lag in iteration", i_fdi))
      results_fdi$coef[i_fdi] <- NA
      results_fdi$se[i_fdi] <- NA
      results_fdi$p_value[i_fdi] <- NA
    }
  } else {
    # If the model failed to run, assign NA
    print(paste("Model failed in iteration", i_fdi))
    results_fdi$coef[i_fdi] <- NA
    results_fdi$se[i_fdi] <- NA
    results_fdi$p_value[i_fdi] <- NA
  }
  
  # Record which leader was excluded
  results_fdi$leader_excluded[i_fdi] <- leaders[i_fdi]
  
  # Record the leader name for the excluded leader
  results_fdi$leader_name[i_fdi] <- data$shortened_leader_name[data$leader_id == leaders[i_fdi]]
}

print(results_fdi)


# Figure A5: Distribution of estimated coefficients leaving one leader out at a time, FDI

# Plot histogram of coefficients excluding NAs

figure_a5 <- ggplot(results_fdi %>% filter(!is.na(coef)), aes(x = coef)) +
  geom_histogram(bins = 50, fill = "#4c4c4c", color = "#4c4c4c") +
  theme_bw() +
  labs(title = "Distribution of estimated coefficients leaving one leader out at a time, FDI", x = "Coefficient", y = "Frequency")

figure_a5

ggsave(figure_a5, file = "Figure A5.pdf", width = 20, height = 10)


# Figure A6: Estimated coefficients by excluded leader, FDI

# Remove NAs for plotting and sort the results by leader_name
plot_data_fdi <- results_fdi %>% filter(!is.na(coef)) %>% arrange(leader_excluded)

# Create a categorical variable for p-value ranges with clear labels
plot_data_fdi <- plot_data_fdi %>%
  mutate(Significance = case_when(
    p_value <= 0.01 ~ "p <= 0.01",
    p_value <= 0.05 ~ "0.01 < p <= 0.05",
    p_value <= 0.1 ~ "0.05 < p <= 0.1",
    TRUE ~ "p > 0.1"
  ))

# Ensure that the factor levels for Significance are the same for all plots
plot_data_fdi$Significance <- factor(plot_data_fdi$Significance, 
                                     levels = c("p <= 0.01", "0.01 < p <= 0.05", 
                                                "0.05 < p <= 0.1", "p > 0.1"))

# Create a complete dataset with all possible p_categories
complete_categories_fdi <- expand.grid(leader_name = unique(plot_data_fdi$leader_name),
                                       Significance = levels(plot_data_fdi$Significance))

# Merge with plot_data to ensure all categories are represented
plot_data_fdi <- merge(complete_categories_fdi, plot_data_fdi, by = c("leader_name", "Significance"), all.x = TRUE)

# Filter out rows with NA in leader_name
filtered_data_fdi <- plot_data_fdi[!is.na(plot_data_fdi$coef), ]
filtered_data_fdi <- plot_data_fdi[!is.na(plot_data_fdi$se), ]
filtered_data_fdi <- plot_data_fdi[!is.na(plot_data_fdi$p_value), ]

# Now use the filtered data for plotting
num_leaders_fdi <- nrow(filtered_data_fdi)
group_size_fdi <- ceiling(num_leaders_fdi / 3)  # Group size to ensure equal distribution

# Create a list to store plots
plots_fdi <- vector("list", 3)

# Common color palette for p-value categories
color_palette_fdi <- c("p <= 0.01" = "green", 
                       "0.01 < p <= 0.05" = "blue", 
                       "0.05 < p <= 0.1" = "yellow", 
                       "p > 0.1" = "red")

for (i_fdi2 in seq_len(3)) {
  start_idx_fdi <- (i_fdi2 - 1) * group_size_fdi + 1
  end_idx_fdi <- min(i_fdi2 * group_size_fdi, num_leaders_fdi)
  
  chunk_data_fdi <- filtered_data_fdi[start_idx_fdi:end_idx_fdi, ]
  
  # Calculate y-axis limits based on coefficients and standard errors
  y_min_fdi <- min(chunk_data_fdi$coef - chunk_data_fdi$se, na.rm = TRUE)
  y_max_fdi <- max(chunk_data_fdi$coef + chunk_data_fdi$se, na.rm = TRUE)
  
  # Add some margin for better visibility
  y_margin_fdi <- (y_max_fdi - y_min_fdi) * 0.1  # 10% margin
  y_min_fdi <- y_min_fdi - y_margin_fdi
  y_max_fdi <- y_max_fdi + y_margin_fdi
  
  # Create the plot for the current chunk with adjusted y-axis limits
  plots_fdi[[i_fdi2]] <- ggplot(chunk_data_fdi, aes(x = factor(leader_name), y = coef, color = Significance)) +
    geom_point(size = 3) +  # Fixed size for points
    geom_errorbar(aes(ymin = coef - se, ymax = coef + se), width = 0.1, color = "black", alpha = 0.5) +  # Black error bars
    labs(title = paste("Leaders", start_idx_fdi, "to", end_idx_fdi), 
         x = "Leader Name (Excluded)", 
         y = "Coefficient") +
    coord_cartesian(ylim = c(y_min_fdi, y_max_fdi)) +  # Dynamically set y-axis limits with margin
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6),  # Smaller font size
          plot.title = element_text(hjust = 0.5)) +  # Center title
    scale_color_manual(values = color_palette_fdi)  # Use common color palette
}

# Combine all plots into one with a single legend using patchwork
combined_plot_fdi <- wrap_plots(plots_fdi, ncol = 1) & 
  plot_layout(guides = "collect") &  # Common legend
  theme(legend.position = "bottom",  # Set legend position at the bottom
        legend.box = "horizontal")  # Horizontal legend box

# Add a common legend
combined_plot_fdi <- combined_plot_fdi + plot_annotation(title = "Estimated coefficients by excluded leader, FDI")

# Print the combined plot
print(combined_plot_fdi)


ggsave(combined_plot_fdi, file = "Figure_A6.pdf", width = 20, height = 10)



## Appendix ends here ---------------------------------------------------------


#save.image(file = "Replication_PSRM_Aid_FDI_Personalism.RData")


# The end --------------------------------------------------------------------

sink()